| @@ -134,11 +134,7 @@ build_graphengine() | |||
| mk_dir "${BUILD_PATH}" | |||
| cd "${BUILD_PATH}" | |||
| if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" | |||
| else | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" | |||
| fi | |||
| CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH" | |||
| if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_COV=ON" | |||
| @@ -156,7 +152,13 @@ build_graphengine() | |||
| if [[ "X$ENABLE_GITEE" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON" | |||
| fi | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" | |||
| if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}" | |||
| else | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}" | |||
| fi | |||
| echo "${CMAKE_ARGS}" | |||
| cmake ${CMAKE_ARGS} .. | |||
| if [ $? -ne 0 ] | |||
| @@ -233,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| # fi | |||
| # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| echo "Generating coverage statistics, please wait..." | |||
| cd ${BASEPATH} | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| echo "Generating coverage statistics, please wait..." | |||
| cd ${BASEPATH} | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| fi | |||
| # generate output package in tar form, including ut/st libraries/executables | |||
| @@ -35,6 +35,7 @@ protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
| ############ libge_proto_common.a ############ | |||
| add_library(ge_proto_common STATIC | |||
| ${PROTO_HEADER_HDRS} | |||
| ${PROTO_SRCS} | |||
| ) | |||
| @@ -55,6 +56,7 @@ target_link_libraries(ge_proto_common PRIVATE | |||
| ############ libge_proto_client.a ############ | |||
| add_library(ge_proto_client STATIC | |||
| ${PROTO_HEADER_HDRS} | |||
| ${PROTO_CLIENT_SRCS} | |||
| ) | |||
| @@ -127,38 +129,38 @@ set(TRAIN_SRC_LIST | |||
| "graph/label/partitioned_call_label_maker.cc" | |||
| "graph/label/while_label_maker.cc" | |||
| "graph/load/graph_loader.cc" | |||
| "graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/new_model_manager/data_dumper.cc" | |||
| "graph/load/new_model_manager/data_inputer.cc" | |||
| "graph/load/new_model_manager/davinci_model.cc" | |||
| "graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "graph/load/new_model_manager/model_manager.cc" | |||
| "graph/load/new_model_manager/model_utils.cc" | |||
| "graph/load/new_model_manager/aipp_utils.cc" | |||
| "graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/hccl_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/new_model_manager/task_info/task_info.cc" | |||
| "graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "graph/load/new_model_manager/zero_copy_task.cc" | |||
| "graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/model_manager/data_dumper.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| "graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/model_manager/task_info/hccl_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/model_manager/task_info/task_info.cc" | |||
| "graph/load/model_manager/tbe_handle_store.cc" | |||
| "graph/load/model_manager/zero_copy_task.cc" | |||
| "graph/load/model_manager/zero_copy_offset.cc" | |||
| "graph/manager/graph_context.cc" | |||
| "graph/manager/graph_manager.cc" | |||
| "graph/manager/graph_manager_utils.cc" | |||
| @@ -200,6 +202,7 @@ set(TRAIN_SRC_LIST | |||
| "graph/passes/compile_nodes_pass.cc" | |||
| "graph/passes/constant_folding_pass.cc" | |||
| "graph/passes/constant_fuse_same_pass.cc" | |||
| "graph/passes/fuse_data_nodes_with_common_input_pass.cc" | |||
| "graph/passes/remove_same_const_pass.cc" | |||
| "graph/passes/useless_control_out_remove_pass.cc" | |||
| "graph/passes/control_trigger_pass.cc" | |||
| @@ -372,6 +375,7 @@ set(TRAIN_SRC_LIST | |||
| "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||
| "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||
| "hybrid/node_executor/controlop/control_op_executor.cc" | |||
| "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
| "hybrid/node_executor/hccl/hccl_node_executor.cc" | |||
| @@ -482,6 +486,7 @@ set(INFER_SRC_LIST | |||
| "graph/passes/net_output_pass.cc" | |||
| "graph/passes/replace_transshape_pass.cc" | |||
| "graph/passes/constant_fuse_same_pass.cc" | |||
| "graph/passes/fuse_data_nodes_with_common_input_pass.cc" | |||
| "graph/passes/print_op_pass.cc" | |||
| "graph/passes/no_use_reshape_remove_pass.cc" | |||
| "graph/passes/iterator_op_pass.cc" | |||
| @@ -601,37 +606,37 @@ set(INFER_SRC_LIST | |||
| "graph/manager/util/rt_context_util.cc" | |||
| "graph/manager/util/variable_accelerate_ctrl.cc" | |||
| "graph/manager/util/debug.cc" | |||
| "graph/load/new_model_manager/model_manager.cc" | |||
| "graph/load/new_model_manager/data_inputer.cc" | |||
| "graph/load/new_model_manager/davinci_model.cc" | |||
| "graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "graph/load/new_model_manager/model_utils.cc" | |||
| "graph/load/new_model_manager/aipp_utils.cc" | |||
| "graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/new_model_manager/zero_copy_task.cc" | |||
| "graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "graph/load/new_model_manager/data_dumper.cc" | |||
| "graph/load/new_model_manager/task_info/task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| "graph/load/model_manager/tbe_handle_store.cc" | |||
| "graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "graph/load/model_manager/zero_copy_task.cc" | |||
| "graph/load/model_manager/zero_copy_offset.cc" | |||
| "graph/load/model_manager/data_dumper.cc" | |||
| "graph/load/model_manager/task_info/task_info.cc" | |||
| "graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "single_op/task/op_task.cc" | |||
| "single_op/task/build_task_utils.cc" | |||
| "single_op/task/tbe_task_builder.cc" | |||
| @@ -187,6 +187,8 @@ target_compile_options(ge_common PRIVATE | |||
| -fvisibility=hidden | |||
| -O2 | |||
| -Werror | |||
| -Wno-deprecated-declarations | |||
| -fno-common | |||
| ) | |||
| target_include_directories(ge_common PRIVATE | |||
| @@ -28,7 +28,7 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/detail/attributes_holder.h" | |||
| #include "graph/detail/model_serialize_imp.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| @@ -23,7 +23,7 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/omg/version.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| @@ -21,7 +21,7 @@ | |||
| #include "framework/common/string_util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "runtime/base.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const char *const kTrainingTrace = "training_trace"; | |||
| @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
| uint32_t stream_id = task.stream_id; | |||
| std::string shape_type = task.shape_type; | |||
| int64_t cur_iter_num = task.cur_iter_num; | |||
| uint32_t task_type = task.task_type; | |||
| data = model_name.append(" ") | |||
| .append(op_name).append(" ") | |||
| .append(std::to_string(block_dim)).append(" ") | |||
| @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
| .append(std::to_string(stream_id)).append(" ") | |||
| .append(std::to_string(model_id)).append(" ") | |||
| .append(shape_type).append(" ") | |||
| .append(std::to_string(cur_iter_num)).append("\n"); | |||
| .append(std::to_string(cur_iter_num)).append(" ") | |||
| .append(std::to_string(task_type)).append("\n"); | |||
| ReporterData reporter_data{}; | |||
| reporter_data.deviceId = device_id; | |||
| @@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||
| REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); | |||
| REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | |||
| REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | |||
| @@ -32,37 +32,37 @@ set(SRC_LIST | |||
| "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||
| "../model/ge_model.cc" | |||
| "../model/ge_root_model.cc" | |||
| "../graph/load/new_model_manager/davinci_model.cc" | |||
| "../graph/load/new_model_manager/davinci_model_parser.cc" | |||
| "../graph/load/new_model_manager/model_manager.cc" | |||
| "../graph/load/new_model_manager/tbe_handle_store.cc" | |||
| "../graph/load/new_model_manager/cpu_queue_schedule.cc" | |||
| "../graph/load/new_model_manager/model_utils.cc" | |||
| "../graph/load/new_model_manager/aipp_utils.cc" | |||
| "../graph/load/new_model_manager/data_inputer.cc" | |||
| "../graph/load/new_model_manager/data_dumper.cc" | |||
| "../graph/load/new_model_manager/zero_copy_task.cc" | |||
| "../graph/load/new_model_manager/zero_copy_offset.cc" | |||
| "../graph/load/new_model_manager/task_info/task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||
| "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "../graph/load/model_manager/davinci_model.cc" | |||
| "../graph/load/model_manager/davinci_model_parser.cc" | |||
| "../graph/load/model_manager/model_manager.cc" | |||
| "../graph/load/model_manager/tbe_handle_store.cc" | |||
| "../graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "../graph/load/model_manager/model_utils.cc" | |||
| "../graph/load/model_manager/aipp_utils.cc" | |||
| "../graph/load/model_manager/data_inputer.cc" | |||
| "../graph/load/model_manager/data_dumper.cc" | |||
| "../graph/load/model_manager/zero_copy_task.cc" | |||
| "../graph/load/model_manager/zero_copy_offset.cc" | |||
| "../graph/load/model_manager/task_info/task_info.cc" | |||
| "../graph/load/model_manager/task_info/event_record_task_info.cc" | |||
| "../graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
| "../graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
| "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
| "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||
| "../graph/load/model_manager/task_info/kernel_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_set_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||
| "../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||
| "../graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||
| "../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||
| "../graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_active_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||
| "../graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||
| "../graph/load/model_manager/task_info/end_graph_task_info.cc" | |||
| "../graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
| "../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
| "../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
| "../graph/common/local_context.cc" | |||
| "../opskernel_manager/ops_kernel_builder_manager.cc" | |||
| "../single_op/single_op_manager.cc" | |||
| @@ -104,6 +104,7 @@ set(SRC_LIST | |||
| "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||
| "../hybrid/node_executor/controlop/control_op_executor.cc" | |||
| "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
| "../hybrid/node_executor/rts/rts_node_executor.cc" | |||
| @@ -29,15 +29,15 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/execute/graph_execute.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "single_op/single_op_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| using std::string; | |||
| @@ -22,37 +22,37 @@ local_ge_executor_src_files := \ | |||
| ../graph/manager/util/debug.cc \ | |||
| ../model/ge_model.cc \ | |||
| ../model/ge_root_model.cc \ | |||
| ../graph/load/new_model_manager/davinci_model.cc \ | |||
| ../graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| ../graph/load/new_model_manager/model_manager.cc \ | |||
| ../graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| ../graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| ../graph/load/new_model_manager/model_utils.cc \ | |||
| ../graph/load/new_model_manager/aipp_utils.cc \ | |||
| ../graph/load/new_model_manager/data_inputer.cc \ | |||
| ../graph/load/new_model_manager/data_dumper.cc \ | |||
| ../graph/load/new_model_manager/zero_copy_task.cc \ | |||
| ../graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| ../graph/load/new_model_manager/task_info/task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| ../graph/load/model_manager/davinci_model.cc \ | |||
| ../graph/load/model_manager/davinci_model_parser.cc \ | |||
| ../graph/load/model_manager/model_manager.cc \ | |||
| ../graph/load/model_manager/tbe_handle_store.cc \ | |||
| ../graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| ../graph/load/model_manager/model_utils.cc \ | |||
| ../graph/load/model_manager/aipp_utils.cc \ | |||
| ../graph/load/model_manager/data_inputer.cc \ | |||
| ../graph/load/model_manager/data_dumper.cc \ | |||
| ../graph/load/model_manager/zero_copy_task.cc \ | |||
| ../graph/load/model_manager/zero_copy_offset.cc \ | |||
| ../graph/load/model_manager/task_info/task_info.cc \ | |||
| ../graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| ../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| ../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| ../opskernel_manager/ops_kernel_builder_manager.cc \ | |||
| ../single_op/single_op_manager.cc \ | |||
| ../single_op/single_op_model.cc \ | |||
| @@ -95,6 +95,7 @@ local_ge_executor_src_files := \ | |||
| ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||
| ../hybrid/node_executor/controlop/control_op_executor.cc \ | |||
| ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
| ../hybrid/node_executor/rts/rts_node_executor.cc \ | |||
| @@ -103,6 +103,7 @@ OMG_HOST_SRC_FILES := \ | |||
| graph/passes/net_output_pass.cc \ | |||
| graph/passes/replace_transshape_pass.cc \ | |||
| graph/passes/constant_fuse_same_pass.cc \ | |||
| graph/passes/fuse_data_nodes_with_common_input_pass.cc \ | |||
| graph/passes/print_op_pass.cc \ | |||
| graph/passes/no_use_reshape_remove_pass.cc \ | |||
| graph/passes/iterator_op_pass.cc \ | |||
| @@ -227,37 +228,37 @@ OME_HOST_SRC_FILES := \ | |||
| graph/manager/util/rt_context_util.cc \ | |||
| graph/manager/util/variable_accelerate_ctrl.cc \ | |||
| graph/manager/util/debug.cc \ | |||
| graph/load/new_model_manager/model_manager.cc \ | |||
| graph/load/new_model_manager/data_inputer.cc \ | |||
| graph/load/new_model_manager/davinci_model.cc \ | |||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| graph/load/new_model_manager/model_utils.cc \ | |||
| graph/load/new_model_manager/aipp_utils.cc \ | |||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||
| graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| graph/load/new_model_manager/data_dumper.cc \ | |||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/model_manager/model_manager.cc \ | |||
| graph/load/model_manager/data_inputer.cc \ | |||
| graph/load/model_manager/davinci_model.cc \ | |||
| graph/load/model_manager/davinci_model_parser.cc \ | |||
| graph/load/model_manager/model_utils.cc \ | |||
| graph/load/model_manager/aipp_utils.cc \ | |||
| graph/load/model_manager/tbe_handle_store.cc \ | |||
| graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/model_manager/zero_copy_task.cc \ | |||
| graph/load/model_manager/zero_copy_offset.cc \ | |||
| graph/load/model_manager/data_dumper.cc \ | |||
| graph/load/model_manager/task_info/task_info.cc \ | |||
| graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| single_op/task/op_task.cc \ | |||
| single_op/task/build_task_utils.cc \ | |||
| single_op/task/tbe_task_builder.cc \ | |||
| @@ -269,7 +270,7 @@ OME_HOST_SRC_FILES := \ | |||
| single_op/single_op_manager.cc \ | |||
| hybrid/hybrid_davinci_model_stub.cc \ | |||
| hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||
| # graph/load/new_model_manager/task_info/hccl_task_info.cc | |||
| # graph/load/model_manager/task_info/hccl_task_info.cc | |||
| OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | |||
| @@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| graph/label/partitioned_call_label_maker.cc \ | |||
| graph/label/while_label_maker.cc \ | |||
| graph/load/graph_loader.cc \ | |||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/new_model_manager/data_dumper.cc \ | |||
| graph/load/new_model_manager/data_inputer.cc \ | |||
| graph/load/new_model_manager/davinci_model.cc \ | |||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||
| graph/load/new_model_manager/model_manager.cc \ | |||
| graph/load/new_model_manager/model_utils.cc \ | |||
| graph/load/new_model_manager/aipp_utils.cc \ | |||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||
| graph/load/new_model_manager/zero_copy_offset.cc \ | |||
| graph/load/model_manager/cpu_queue_schedule.cc \ | |||
| graph/load/model_manager/data_dumper.cc \ | |||
| graph/load/model_manager/data_inputer.cc \ | |||
| graph/load/model_manager/davinci_model.cc \ | |||
| graph/load/model_manager/davinci_model_parser.cc \ | |||
| graph/load/model_manager/model_manager.cc \ | |||
| graph/load/model_manager/model_utils.cc \ | |||
| graph/load/model_manager/aipp_utils.cc \ | |||
| graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||
| graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_record_task_info.cc \ | |||
| graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||
| graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||
| graph/load/model_manager/task_info/hccl_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/kernel_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_set_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
| graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||
| graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||
| graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||
| graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
| graph/load/model_manager/task_info/task_info.cc \ | |||
| graph/load/model_manager/tbe_handle_store.cc \ | |||
| graph/load/model_manager/zero_copy_task.cc \ | |||
| graph/load/model_manager/zero_copy_offset.cc \ | |||
| graph/manager/graph_context.cc \ | |||
| graph/manager/graph_manager.cc \ | |||
| graph/manager/graph_manager_utils.cc \ | |||
| @@ -127,6 +127,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| graph/passes/compile_nodes_pass.cc \ | |||
| graph/passes/constant_folding_pass.cc \ | |||
| graph/passes/constant_fuse_same_pass.cc \ | |||
| graph/passes/fuse_data_nodes_with_common_input_pass.cc \ | |||
| graph/passes/remove_same_const_pass.cc \ | |||
| graph/passes/useless_control_out_remove_pass.cc \ | |||
| graph/passes/control_trigger_pass.cc \ | |||
| @@ -299,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||
| hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||
| hybrid/node_executor/controlop/control_op_executor.cc \ | |||
| hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
| hybrid/node_executor/hccl/hccl_node_executor.cc \ | |||
| @@ -23,6 +23,8 @@ add_library(ge_runtime SHARED ${GE_SRC_LIST}) | |||
| target_compile_options(ge_runtime PRIVATE | |||
| -Werror | |||
| -O2 | |||
| -Wno-deprecated-declarations | |||
| -fno-common | |||
| ) | |||
| target_compile_definitions(ge_runtime PRIVATE | |||
| @@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||
| Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||
| if (comp_graph == nullptr) { | |||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | |||
| return GE_GRAPH_PARAM_NULLPTR; | |||
| @@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo | |||
| (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||
| if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) { | |||
| GE_CHK_STATUS_RET( | |||
| BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), | |||
| BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), | |||
| "Build for dynamic shape graph failed."); | |||
| return SUCCESS; | |||
| } | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), | |||
| "Build for known shape graph failed."); | |||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeModelPtr &ge_model_ptr, uint64_t session_id) { | |||
| if (ge::GetContext().GetHostExecFlag()) { | |||
| GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); | |||
| @@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||
| } | |||
| GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | |||
| Status ret = SecondPartition(comp_graph, subgraph_list); | |||
| Status ret = SecondPartition(comp_graph); | |||
| GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | |||
| auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | |||
| @@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||
| } | |||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id) { | |||
| GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | |||
| @@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| } | |||
| } | |||
| // known shape build flow | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||
| "Build for known shape graph failed."); | |||
| } | |||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | |||
| @@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | |||
| Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { | |||
| GE_TIMESTAMP_START(GraphPartition2); | |||
| auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | |||
| if (ret != SUCCESS) { | |||
| @@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge: | |||
| return ret; | |||
| } | |||
| GE_CHK_STATUS_RET(ret, "Graph partition Failed."); | |||
| auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||
| if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) { | |||
| subgraph_ptr_list = graph_2_subgraphlist[comp_graph]; | |||
| } else { | |||
| const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||
| if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) { | |||
| GELOGE(FAILED, "Find subgraph failed."); | |||
| return FAILED; | |||
| } | |||
| @@ -47,8 +47,7 @@ class GraphBuilder { | |||
| GraphBuilder(const GraphBuilder &in) = delete; | |||
| GraphBuilder &operator=(const GraphBuilder &in) = delete; | |||
| virtual ~GraphBuilder() = default; | |||
| Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| void SetOptions(const GraphManagerOptions &options); | |||
| private: | |||
| @@ -59,12 +58,12 @@ class GraphBuilder { | |||
| Status UpdateDataInputSize(const ge::NodePtr &node_ptr); | |||
| Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | |||
| Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | |||
| Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list); | |||
| Status SecondPartition(ge::ComputeGraphPtr &comp_graph); | |||
| Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); | |||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id = INVALID_SESSION_ID); | |||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||
| GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
| Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | |||
| uint64_t session_id = INVALID_SESSION_ID); | |||
| @@ -24,6 +24,7 @@ | |||
| #include "graph/buffer.h" | |||
| #include "graph/ge_attr_value.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/types.h" | |||
| #include "graph/node.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| @@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| if (output_op_desc != nullptr) { | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| } | |||
| // fusion: other type's size not means malloc HBM memory | |||
| bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | |||
| if (l1_flag) { | |||
| @@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | |||
| size = 0; | |||
| } | |||
| int32_t calc_type = 0; | |||
| bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||
| GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); | |||
| std::string peer_name; | |||
| uint32_t peer_input_index = 0; | |||
| bool out_node_set_continuous_input = false; | |||
| @@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||
| return FAILED); | |||
| ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); | |||
| GE_CHECK_NOTNULL(tensor_desc); | |||
| rtMemType_t memory_type = RT_MEMORY_HBM; | |||
| uint32_t mem_type = 0; | |||
| if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { | |||
| memory_type = RT_MEMORY_RDMA_HBM; | |||
| } | |||
| if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { | |||
| GE_CHK_STATUS_RET( | |||
| VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); | |||
| VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); | |||
| GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, | |||
| GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); | |||
| GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | |||
| @@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||
| } | |||
| uint8_t *dev_ptr = nullptr; | |||
| rtMemType_t memory_type = RT_MEMORY_HBM; | |||
| GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | |||
| ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); | |||
| vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset(); | |||
| @@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { | |||
| return false; | |||
| } | |||
| // Iteraotor loop : | |||
| // StreamSwitch -> StreamActive | |||
| // FpBp loop: | |||
| // StreamSwitch -> AssignAdd -> StreamActive | |||
| NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { | |||
| for (auto pre_node : active_node->GetInControlNodes()) { | |||
| if (pre_node->GetType() == STREAMSWITCH) { | |||
| return pre_node; | |||
| } | |||
| for (auto pre_pre_node : pre_node->GetInControlNodes()) { | |||
| if (pre_pre_node->GetType() == STREAMSWITCH) { | |||
| return pre_pre_node; | |||
| } | |||
| } | |||
| } | |||
| return nullptr; | |||
| } | |||
| Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| vector<uint32_t> loop_active_streams; | |||
| for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | |||
| @@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| bool is_loop_active = false; | |||
| if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | |||
| vector<string> activated_label_list; | |||
| NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); | |||
| if (pre_switch_node == nullptr) { | |||
| GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | |||
| activated_label_list.empty()) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), | |||
| @@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| // it may cause some stream actived by iterator next step when this stream still alive. | |||
| // If above situation happen, active message will lose, cause process block in next iteration. | |||
| // In order to avoid this abnormal happen, | |||
| // add event between each last node and iterator active node in target active stream | |||
| // add event between each last node and iterator switch node | |||
| GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); | |||
| for (auto iter : stream_id_to_last_node) { | |||
| if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { | |||
| @@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||
| continue; | |||
| } | |||
| AddSendEventId(iter.second, event_num_); | |||
| AddRecvEventId(node, event_num_); | |||
| AddRecvEventId(pre_switch_node, event_num_); | |||
| event_num_++; | |||
| } | |||
| @@ -21,7 +21,7 @@ | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| #include "runtime/mem.h" | |||
| @@ -22,8 +22,8 @@ | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||
| #include "graph/load/model_manager/aipp_utils.h" | |||
| #include <string> | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/model_manager/cpu_queue_schedule.h" | |||
| #include "common/debug/ge_log.h" | |||
| #include "common/debug/log.h" | |||
| @@ -20,8 +20,8 @@ | |||
| #include <vector> | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/new_model_manager/zero_copy_offset.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/zero_copy_offset.h" | |||
| #include "runtime/kernel.h" | |||
| namespace ge { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/data_dumper.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| #include <cstdlib> | |||
| #include <ctime> | |||
| @@ -29,7 +29,7 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/anchor.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/manager/util/debug.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include <securec.h> | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include <graph/utils/node_utils.h> | |||
| #include <algorithm> | |||
| @@ -36,9 +36,9 @@ | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/graph.h" | |||
| #include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/new_model_manager/tbe_handle_store.h" | |||
| #include "graph/load/model_manager/cpu_queue_schedule.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/manager/trans_var_data_utils.h" | |||
| @@ -520,6 +520,8 @@ Status DavinciModel::DoTaskSink() { | |||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||
| GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | |||
| @@ -716,24 +718,10 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); | |||
| GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | |||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | |||
| bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||
| bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||
| bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || | |||
| findByOmName || findByModelName; | |||
| if (dump_l1fusion_op) { | |||
| // malloc 2M for dump l1fusion op | |||
| GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); | |||
| // send l1fusion dump addr to rts | |||
| GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); | |||
| } | |||
| /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, | |||
| /// the aicpu opertor needs to destroy history record, and update operator memory address. | |||
| /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). | |||
| need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); | |||
| (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | |||
| string fp_ceiling_mode; | |||
| if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | |||
| @@ -2079,6 +2067,8 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||
| Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) { | |||
| GELOGD("Output node size: %zu", output_op_list.size()); | |||
| vector<string> out_node_name; | |||
| (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); | |||
| for (const auto &op_desc : output_op_list) { | |||
| uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
| for (uint32_t index = 0; index < out_size; index++) { | |||
| @@ -2092,11 +2082,11 @@ Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) | |||
| GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, | |||
| "construct output_name failed."); | |||
| // forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||
| if (out_size == out_node_name_.size()) { | |||
| if (out_size == out_node_name.size()) { | |||
| // neweast plan, the index will add to name during generate model. | |||
| bool contains_colon = out_node_name_[index].find(":") != std::string::npos; | |||
| bool contains_colon = out_node_name[index].find(":") != std::string::npos; | |||
| output_name = | |||
| contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); | |||
| contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); | |||
| } else { | |||
| output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | |||
| std::to_string(src_index[index]); | |||
| @@ -3075,6 +3065,64 @@ Status DavinciModel::MallocKnownArgs() { | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||
| const domi::TaskDef &task_def, size_t task_index) { | |||
| bool flag = GetL1FusionEnableOption(); | |||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
| if (env_flag != 0) { | |||
| flag = true; | |||
| } | |||
| TaskDescInfo task_desc_info; | |||
| if (!om_name_.empty()) { | |||
| task_desc_info.model_name = om_name_; | |||
| } else { | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| // task type | |||
| task_desc_info.task_type = kTaskTypeInvalid; | |||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const auto &context = kernel_def.context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| task_desc_info.task_type = kTaskTypeAicore; | |||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| task_desc_info.task_type = kTaskTypeAicpu; | |||
| } else { | |||
| GELOGD("Other kernel type: %u", context.kernel_type()); | |||
| } | |||
| } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
| task_desc_info.task_type = kTaskTypeAicpu; | |||
| } else { | |||
| GELOGD("Skip task type: %d", static_cast<int>(model_task_type)); | |||
| } | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| if (flag) { | |||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
| TaskDescInfo task_desc_info; | |||
| string op_name = "super_kernel_" + to_string(task_index); | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.task_id = task->GetSktTaskID(); | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| Status DavinciModel::DistributeTask() { | |||
| GELOGI("do Distribute."); | |||
| for (auto &task : cpu_task_list_) { | |||
| @@ -3086,18 +3134,11 @@ Status DavinciModel::DistributeTask() { | |||
| } | |||
| task_desc_info_.clear(); | |||
| bool flag = GetL1FusionEnableOption(); | |||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
| if (env_flag != 0) { | |||
| flag = true; | |||
| } | |||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | |||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||
| auto &task_def = model_task_def->task(task_index); | |||
| auto &task = task_list_.at(task_index); | |||
| GE_CHECK_NOTNULL(task); | |||
| GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | |||
| // for data dump | |||
| auto op_index = std::max(task_def.kernel().context().op_index(), | |||
| @@ -3117,33 +3158,9 @@ Status DavinciModel::DistributeTask() { | |||
| GE_IF_BOOL_EXEC(no_need_profiling, continue); | |||
| SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | |||
| // Load task info for profiling | |||
| TaskDescInfo task_desc_info; | |||
| if (!om_name_.empty()) { | |||
| task_desc_info.model_name = om_name_; | |||
| } else { | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| if (flag) { | |||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
| TaskDescInfo task_desc_info; | |||
| string op_name = "super_kernel_" + to_string(task_index); | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.task_id = task->GetSktTaskID(); | |||
| profiler_report_op_info_[task_desc_info.op_name] = | |||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| // save task info for profiling | |||
| SaveProfilingTaskDescInfo(op, task, task_def, task_index); | |||
| } | |||
| // launch dump kernel to aicpu | |||
| GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | |||
| @@ -3951,7 +3968,6 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||
| data_dumper_.SetOmName(om_name_); | |||
| data_dumper_.SetComputeGraph(graph); | |||
| data_dumper_.SetRefInfo(saved_task_addrs_); | |||
| data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | |||
| int32_t device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&device_id); | |||
| @@ -4161,4 +4177,28 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||
| } | |||
| } | |||
| Status DavinciModel::InitL1DataDumperArgs() { | |||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | |||
| bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | |||
| bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end(); | |||
| bool dump_l1fusion_op = | |||
| (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | |||
| if (dump_l1fusion_op) { | |||
| // malloc 2M for dump l1fusion op | |||
| GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); | |||
| // send l1fusion dump addr to rts | |||
| if (rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion) != | |||
| RT_ERROR_NONE) { | |||
| // l1_fusion_addr_ will be free when DavinciModel destruct | |||
| GELOGE(FAILED, "Call rtDumpAddrSet failed"); | |||
| return FAILED; | |||
| } | |||
| // set addr for l1 data dump | |||
| data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -32,12 +32,12 @@ | |||
| #include "common/types.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||
| #include "graph/load/new_model_manager/data_dumper.h" | |||
| #include "graph/load/new_model_manager/data_inputer.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/new_model_manager/zero_copy_offset.h" | |||
| #include "graph/load/new_model_manager/zero_copy_task.h" | |||
| #include "graph/load/model_manager/aipp_utils.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/zero_copy_offset.h" | |||
| #include "graph/load/model_manager/zero_copy_task.h" | |||
| #include "graph/model.h" | |||
| #include "graph/node.h" | |||
| #include "graph/op_desc.h" | |||
| @@ -623,6 +623,9 @@ class DavinciModel { | |||
| Status DistributeTask(); | |||
| void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||
| const domi::TaskDef &task_def, size_t task_index); | |||
| uint8_t *MallocFeatureMapMem(size_t data_size); | |||
| uint8_t *MallocWeightsMem(size_t weights_size); | |||
| @@ -837,6 +840,8 @@ class DavinciModel { | |||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||
| Status InitL1DataDumperArgs(); | |||
| Status InitModelProfile(); | |||
| Status SinkModelProfile(); | |||
| @@ -881,7 +886,6 @@ class DavinciModel { | |||
| GeModelPtr ge_model_; // release after DavinciModel::Init | |||
| bool need_destroy_aicpu_kernel_{false}; | |||
| vector<string> out_node_name_; | |||
| map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| namespace ge { | |||
| DavinciModelParser::DavinciModelParser() {} | |||
| @@ -14,10 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include <string> | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/l2_cache_optimize.h" | |||
| @@ -27,8 +28,8 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "model/ge_root_model.h" | |||
| #include "graph/common/local_context.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| @@ -53,7 +54,6 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; | |||
| const char *const kDeleteCustOp = "deleteCustOp"; | |||
| const int kTimeSpecNano = 1000000000; | |||
| const int kTimeSpecMiro = 1000000; | |||
| const int kSessionMaxBias = 100; | |||
| const int kOpNameMaxSize = 100; | |||
| struct CustAicpuSoBuf { | |||
| uint64_t kernelSoBuf; | |||
| @@ -1024,6 +1024,12 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp | |||
| } | |||
| Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
| const uint64_t kSessionTimeMask = 0xffffffffffff0000; | |||
| const uint64_t kSessionPidMask = 0x000000000000ff00; | |||
| const uint64_t kSessionBiasMask = 0x00000000000000ff; | |||
| const uint64_t kMaskPerOffset = 8; | |||
| std::lock_guard<std::mutex> lock(session_id_create_mutex_); | |||
| mmTimeval tv; | |||
| @@ -1031,12 +1037,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get current time."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| session_id = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us | |||
| uint64_t timestamp = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us | |||
| static uint32_t pid = mmGetPid(); | |||
| session_id_bias_++; | |||
| // max bais 100. | |||
| session_id_bias_ = session_id_bias_ % kSessionMaxBias; | |||
| session_id = session_id * kSessionMaxBias + session_id_bias_; | |||
| session_id = ((timestamp<<kMaskPerOffset<<kMaskPerOffset) & kSessionTimeMask) + | |||
| ((pid<<kMaskPerOffset) & kSessionPidMask) + (session_id_bias_ & kSessionBiasMask); | |||
| GELOGD("Generate new session id: %lu.", session_id); | |||
| return SUCCESS; | |||
| @@ -14,20 +14,13 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include <string> | |||
| #include "common/debug/log.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "runtime/base.h" | |||
| #include "runtime/kernel.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/types.h" | |||
| #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | |||
| do { \ | |||
| @@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
| int64_t input_offset = v_input_offset[non_const_index]; | |||
| non_const_index++; | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); | |||
| uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | |||
| uint8_t *variable_addr = nullptr; | |||
| GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); | |||
| v_input_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| int64_t mem_type; | |||
| bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | |||
| // feature maps | |||
| @@ -380,6 +373,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
| return v_input_data_addr; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get variable address. | |||
| /// @return Status | |||
| /// | |||
| Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||
| uint8_t *&var_addr) { | |||
| rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); | |||
| switch (mem_type) { | |||
| case RT_MEMORY_RDMA_HBM: | |||
| if (offset < 0) { | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||
| return PARAM_INVALID; | |||
| } | |||
| var_addr = reinterpret_cast<uint8_t *>(offset); | |||
| break; | |||
| case RT_MEMORY_HBM: | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); | |||
| var_addr = model_param.var_base + offset - model_param.logic_var_base; | |||
| break; | |||
| default: | |||
| GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); | |||
| return PARAM_INVALID; | |||
| } | |||
| GE_CHECK_NOTNULL(var_addr); | |||
| return SUCCESS; | |||
| } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get output data address. | |||
| @@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
| return v_output_data_addr; | |||
| } | |||
| for (size_t i = 0; i < outputs_size; ++i) { | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
| VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); | |||
| uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | |||
| v_output_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||
| if (tensor_desc == nullptr) { | |||
| GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
| continue; | |||
| } | |||
| int32_t calc_type = 0; | |||
| bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||
| if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||
| GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); | |||
| continue; | |||
| } | |||
| GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
| uint8_t *variable_addr = nullptr; | |||
| GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); | |||
| v_output_data_addr.push_back(variable_addr); | |||
| GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||
| model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
| continue); | |||
| int64_t mem_type; | |||
| bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | |||
| // feature maps | |||
| @@ -21,7 +21,7 @@ | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/types.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| #include "graph/utils/tensor_adapter.h" | |||
| @@ -107,6 +107,15 @@ class ModelUtils { | |||
| /// @return Status | |||
| /// | |||
| static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); | |||
| private: | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get variable address. | |||
| /// @return Status | |||
| /// | |||
| static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||
| uint8_t *&var_addr); | |||
| }; | |||
| } // namespace ge | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/end_graph_task_info.h" | |||
| #include "graph/load/model_manager/task_info/end_graph_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const uint32_t kDumpFlag = 2; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EndGraphTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/event_record_task_info.h" | |||
| #include "graph/load/model_manager/task_info/event_record_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EventRecordTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/event_wait_task_info.h" | |||
| #include "graph/load/model_manager/task_info/event_wait_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class EventWaitTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" | |||
| #include "graph/load/model_manager/task_info/fusion_start_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class FusionStartTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" | |||
| #include "graph/load/model_manager/task_info/fusion_stop_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class FusionStopTaskInfo : public TaskInfo { | |||
| @@ -14,14 +14,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/hccl_task_info.h" | |||
| #include "graph/load/model_manager/task_info/hccl_task_info.h" | |||
| #include <utility> | |||
| #include "common/opskernel/ops_kernel_info_store.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| namespace ge { | |||
| std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; | |||
| @@ -23,7 +23,7 @@ | |||
| #include <vector> | |||
| #include "common/opskernel/ge_task_info.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/manager/util/hcom_util.h" | |||
| namespace ge { | |||
| class HcclTaskInfo : public TaskInfo { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" | |||
| #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | |||
| #include <vector> | |||
| @@ -24,8 +24,8 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/fmk_error_codes.h" | |||
| #include "graph/attr_value.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| namespace ge { | |||
| Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/kernel_task_info.h" | |||
| #include "graph/load/model_manager/task_info/kernel_task_info.h" | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| @@ -25,9 +25,9 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/l2_cache_optimize.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "runtime/kernel.h" | |||
| #include "super_kernel/super_kernel.h" | |||
| #include "super_kernel/super_kernel_factory.h" | |||
| @@ -22,7 +22,7 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| class KernelTaskInfo : public TaskInfo { | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelGotoExTaskInfo : public TaskInfo { | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_set_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_set_task_info.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelSetTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" | |||
| #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| constexpr uint8_t kLabelSwitchIndexNum = 1; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" | |||
| #include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace { | |||
| const uint32_t kAlignBytes = 64; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" | |||
| #include "graph/load/model_manager/task_info/memcpy_async_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/model_exit_task_info.h" | |||
| #include "graph/load/model_manager/task_info/model_exit_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class ModelExitTaskInfo : public TaskInfo { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" | |||
| #include "graph/load/model_manager/task_info/profiler_trace_task_info.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class ProfilerTraceTaskInfo : public TaskInfo { | |||
| @@ -14,12 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/stream_active_task_info.h" | |||
| #include "graph/load/model_manager/task_info/stream_active_task_info.h" | |||
| #include <vector> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class StreamActiveTaskInfo : public TaskInfo { | |||
| @@ -14,13 +14,13 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" | |||
| #include "graph/load/model_manager/task_info/stream_switch_task_info.h" | |||
| #include <vector> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -16,7 +16,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class StreamSwitchTaskInfo : public TaskInfo { | |||
| @@ -13,12 +13,12 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" | |||
| #include "graph/load/model_manager/task_info/stream_switchn_task_info.h" | |||
| #include <vector> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| namespace { | |||
| const uint8_t kStreamSwitchnInputNum = 1; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include "graph/op_desc.h" | |||
| namespace ge { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/task_info/task_info.h" | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| #include <vector> | |||
| @@ -22,8 +22,8 @@ | |||
| #include "cce/customize.h" | |||
| #include "framework/common/taskdown_common.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/load/new_model_manager/ts_mem_mall.h" | |||
| #include "graph/load/new_model_manager/task_info/task_info_factory.h" | |||
| #include "graph/load/model_manager/ts_mem_mall.h" | |||
| #include "graph/load/model_manager/task_info/task_info_factory.h" | |||
| #include "proto/task.pb.h" | |||
| namespace ge { | |||
| @@ -14,12 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/zero_copy_offset.h" | |||
| #include "graph/load/model_manager/zero_copy_offset.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/new_model_manager/zero_copy_task.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/zero_copy_task.h" | |||
| namespace ge { | |||
| namespace { | |||
| @@ -25,7 +25,7 @@ | |||
| #include "external/ge/ge_api_error_codes.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/new_model_manager/zero_copy_task.h" | |||
| #include "graph/load/model_manager/zero_copy_task.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "runtime/mem.h" | |||
| @@ -65,7 +65,7 @@ class ZeroCopyOffset { | |||
| // data_size of Data/Netoutput | |||
| int64_t GetDataSize() const { return data_size_; } | |||
| // value of *outside_addrs_ from davinci_model | |||
| std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } | |||
| const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } | |||
| // name of op | |||
| std::string GetOpName() const { return op_name_; } | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/new_model_manager/zero_copy_task.h" | |||
| #include "graph/load/model_manager/zero_copy_task.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "common/ge_compiler_options.h" | |||
| namespace ge { | |||
| @@ -53,6 +53,7 @@ | |||
| #include "graph/passes/dimension_adjust_pass.h" | |||
| #include "graph/passes/dimension_compute_pass.h" | |||
| #include "graph/passes/flow_ctrl_pass.h" | |||
| #include "graph/passes/fuse_data_nodes_with_common_input_pass.h" | |||
| #include "graph/passes/identity_pass.h" | |||
| #include "graph/passes/input_output_connection_identify_pass.h" | |||
| #include "graph/passes/iterator_op_pass.h" | |||
| @@ -2104,6 +2105,24 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
| after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); | |||
| GE_CHK_STATUS_RET( | |||
| after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); | |||
| /* | |||
| * Do CSE before FuseDataNodesWithCommonInputPass to resolve the scene in bertlarge as following: | |||
| * const | |||
| * / | \ | |||
| * cast1 cast2 cast3 | |||
| * \ | / | |||
| * case | |||
| * the node `const` is the fused const node after ConstantFuseSamePass | |||
| * the nodes `cast1`, `cast2` and 'cast3' will be fused by CSE. | |||
| * in order to eliminate hard code in FuseDataNodesWithCommonInputPass, | |||
| * we do CSE before FuseDataNodesWithCommonInputPass | |||
| * But it is a temp solution, this CSE will be deleted after change pass from graph pass to node pass | |||
| */ | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CSEBeforeFuseDataNodesWithCommonInputPass", | |||
| new (std::nothrow) CommonSubexpressionEliminationPass)); | |||
| // FuseDataNodesWithCommonInputPass: fuse same data with common input in same graph | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::FuseDataNodesWithCommonInputPass", | |||
| new (std::nothrow) FuseDataNodesWithCommonInputPass)); | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", | |||
| new (std::nothrow) CommonSubexpressionEliminationPass)); | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass)) | |||
| @@ -2226,12 +2245,12 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
| GELOGE(ret, "Run passes when OptimizeStage1_3 failed, ret:%u.", ret); | |||
| return ret; | |||
| } | |||
| NamesToPass identity_remove_pass; | |||
| GE_TIMESTAMP_START(identity_remove_pass); | |||
| NamesToPass node_pass; | |||
| GE_TIMESTAMP_START(node_pass); | |||
| IdentityPass identity_force_pass(false); // after SwitchToStreamSwitchPass | |||
| identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass); | |||
| ret = GEPass(compute_graph).Run(identity_remove_pass); | |||
| GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass"); | |||
| node_pass.emplace_back("IdentityPass", &identity_force_pass); | |||
| ret = GEPass(compute_graph).Run(node_pass); | |||
| GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass"); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); | |||
| return ret; | |||
| @@ -3102,9 +3121,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp | |||
| graph_name.append(std::to_string(graph_node->GetGraphId())); | |||
| compute_graph->SetName(graph_name); | |||
| } | |||
| std::vector<SubGraphInfoPtr> sub_graph_list; | |||
| auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, | |||
| session_id); | |||
| auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "SubGraph build Failed."); | |||
| return ret; | |||
| @@ -16,17 +16,10 @@ | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include <utility> | |||
| #include "common/l2_cache_optimize.h" | |||
| #include "common/types.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "ge/ge_api_types.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/manager/rdma_pool_allocator.h" | |||
| #include "graph/manager/trans_var_data_utils.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| using std::map; | |||
| @@ -37,7 +30,7 @@ namespace ge { | |||
| VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} | |||
| VarResource::~VarResource() { | |||
| var_offset_set_.clear(); | |||
| var_offset_map_.clear(); | |||
| var_addr_mgr_map_.clear(); | |||
| cur_var_tensor_desc_map_.clear(); | |||
| var_broad_cast_info_.clear(); | |||
| @@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||
| std::string var_key = VarKey(var_name, tensor_desc); | |||
| GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | |||
| if (var_addr_mgr_map_.count(var_key) == 0) { | |||
| uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | |||
| static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
| uint64_t logic_address = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
| if (memory_type != RT_MEMORY_RDMA_HBM) { | |||
| logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase(); | |||
| } | |||
| GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | |||
| TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | |||
| TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); | |||
| @@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||
| var_addr_mgr.tensor_desc = tensor_desc; | |||
| var_addr_mgr.memory_type = memory_type; | |||
| var_addr_mgr_map_[var_key] = var_addr_mgr; | |||
| var_offset_set_.insert(logic_address); | |||
| var_offset_map_[logic_address] = memory_type; | |||
| return SUCCESS; | |||
| } | |||
| @@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na | |||
| return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); | |||
| } | |||
| bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } | |||
| bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } | |||
| rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { | |||
| if (var_offset_map_.count(offset) > 0) { | |||
| return var_offset_map_[offset]; | |||
| } | |||
| return RT_MEMORY_RESERVED; | |||
| } | |||
| VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) { | |||
| auto iter = var_to_trans_road_.find(var_name); | |||
| @@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr | |||
| MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} | |||
| Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { | |||
| MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) { | |||
| switch (mem_type) { | |||
| case RT_MEMORY_HBM: | |||
| return new (std::nothrow) HbmMemResource(); | |||
| case RT_MEMORY_RDMA_HBM: | |||
| return new (std::nothrow) RdmaMemResource(); | |||
| default: | |||
| return nullptr; | |||
| } | |||
| } | |||
| Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, | |||
| size_t &mem_offset) { | |||
| size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; | |||
| uint64_t real_size = size; | |||
| total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); | |||
| @@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin | |||
| return SUCCESS; | |||
| } | |||
| Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) { | |||
| uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size); | |||
| if (buffer == nullptr) { | |||
| GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size); | |||
| return MEMALLOC_FAILED; | |||
| } | |||
| address = static_cast<size_t>(reinterpret_cast<uintptr_t>(buffer)); | |||
| var_mem_size_ += size; | |||
| GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].", | |||
| session_id, var_name.c_str(), 0, buffer, size); | |||
| return SUCCESS; | |||
| } | |||
| uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } | |||
| void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; | |||
| @@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { | |||
| MemResource *mem_resource = nullptr; | |||
| auto iter = mem_resource_map_.find(memory_type); | |||
| if (iter == mem_resource_map_.end()) { | |||
| mem_resource = new (std::nothrow) MemResource(); | |||
| mem_resource = MemResource::BuildMemResourceFromType(memory_type); | |||
| if (mem_resource == nullptr) { | |||
| GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); | |||
| return ge::INTERNAL_ERROR; | |||
| @@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||
| MemResource *mem_resource = nullptr; | |||
| auto it = mem_resource_map_.find(memory_type); | |||
| if (it == mem_resource_map_.end()) { | |||
| mem_resource = new (std::nothrow) MemResource(); | |||
| mem_resource = MemResource::BuildMemResourceFromType(memory_type); | |||
| if (mem_resource == nullptr) { | |||
| GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); | |||
| return ge::INTERNAL_ERROR; | |||
| @@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) { | |||
| return var_resource_->IsVarAddr(offset); | |||
| } | |||
| rtMemType_t VarManager::GetVarMemType(const int64_t &offset) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (var_resource_ == nullptr) { | |||
| GELOGW("VarManager has not been init."); | |||
| return RT_MEMORY_RESERVED; | |||
| } | |||
| return var_resource_->GetVarMemType(offset); | |||
| } | |||
| ge::Status VarManager::MallocVarMemory(size_t memory_size) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| uint8_t *var_mem_base = nullptr; | |||
| @@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { | |||
| uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (memory_type == RT_MEMORY_RDMA_HBM) { | |||
| return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); | |||
| } | |||
| string memory_key = std::to_string(session_id_); | |||
| return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); | |||
| } | |||
| uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (memory_type == RT_MEMORY_RDMA_HBM) { | |||
| return logic_addr; | |||
| } | |||
| string mem_key = std::to_string(session_id_); | |||
| uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); | |||
| if (mem_base == nullptr) { | |||
| @@ -158,13 +158,15 @@ class VarResource { | |||
| bool IsVarAddr(const int64_t &offset); | |||
| rtMemType_t GetVarMemType(const int64_t &offset); | |||
| std::unordered_map<std::string, ge::GeTensorDesc> GetAllVarDesc() const { return cur_var_tensor_desc_map_; } | |||
| private: | |||
| std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); | |||
| uint64_t session_id_; | |||
| std::unordered_set<uint64_t> var_offset_set_; | |||
| std::unordered_map<uint64_t, rtMemType_t> var_offset_map_; | |||
| std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | |||
| std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | |||
| std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | |||
| @@ -176,19 +178,36 @@ class VarResource { | |||
| class MemResource { | |||
| public: | |||
| MemResource(); | |||
| ~MemResource() = default; | |||
| virtual ~MemResource() = default; | |||
| static MemResource *BuildMemResourceFromType(rtMemType_t mem_type); | |||
| Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); | |||
| virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0; | |||
| uint64_t GetVarMemSize() const; | |||
| void UpdateVarMemSize(int64_t mem_size); | |||
| private: | |||
| protected: | |||
| uint64_t total_size_; | |||
| uint64_t var_mem_size_; | |||
| }; | |||
| class HbmMemResource : public MemResource { | |||
| public: | |||
| HbmMemResource() = default; | |||
| ~HbmMemResource() override = default; | |||
| Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; | |||
| }; | |||
| class RdmaMemResource : public MemResource { | |||
| public: | |||
| RdmaMemResource() = default; | |||
| ~RdmaMemResource() override = default; | |||
| Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override; | |||
| }; | |||
| class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||
| public: | |||
| static VarManager *Instance(uint64_t session_id); | |||
| @@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||
| bool IsVarAddr(const int64_t &offset); | |||
| rtMemType_t GetVarMemType(const int64_t &offset); | |||
| uint8_t *GetVarMemoryBase(rtMemType_t memory_type); | |||
| uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); | |||
| @@ -53,6 +53,10 @@ class RdmaPoolAllocator { | |||
| Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); | |||
| uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; } | |||
| size_t GetRdmaMemSize() { return rdma_mem_size_; } | |||
| private: | |||
| void MergeBlocks(Block *dst, Block *src); | |||
| @@ -44,18 +44,46 @@ | |||
| #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) | |||
| #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) | |||
| bool IsExperimental() { | |||
| const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); | |||
| return kIsExperimental; | |||
| } | |||
| namespace ge { | |||
| using Cluster = DynamicShapePartitioner::Cluster; | |||
| using ClusterPtr = std::shared_ptr<Cluster>; | |||
| static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { | |||
| for (const auto &node : root_graph->GetAllNodes()) { | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { | |||
| auto type = input_desc.GetDataType(); | |||
| if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | |||
| if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { | |||
| return false; | |||
| } else { | |||
| GEEVENT("In dynamic shape scene, model contains data type:" | |||
| "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " | |||
| "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { | |||
| auto type = output_desc.GetDataType(); | |||
| if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | |||
| if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { | |||
| return false; | |||
| } else { | |||
| GEEVENT("In dynamic shape scene, model contains data type:" | |||
| "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " | |||
| "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| Status DynamicShapePartitioner::Partition() { | |||
| REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); | |||
| if (!IsExperimental()) { | |||
| if (!IsInExperimentalMode(root_graph_)) { | |||
| GELOGD("Skip dynamic shape partition as not in experimental mode."); | |||
| REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | |||
| "Failed set dynamic shape partitioned flag on root graph."); | |||
| @@ -185,6 +213,7 @@ std::string DynamicShapePartitioner::DebugString() const { | |||
| size_t data = 0; | |||
| size_t netoutput = 0; | |||
| size_t is_inputnode = 0; | |||
| size_t stage = 0; | |||
| std::stringstream ss; | |||
| ss << "All unknown shape nodes:" << std::endl; | |||
| for (const auto &node : unknown_shape_nodes_) { | |||
| @@ -201,10 +230,13 @@ std::string DynamicShapePartitioner::DebugString() const { | |||
| netoutput++; | |||
| } else if (cluster->IsInputNode()) { | |||
| is_inputnode++; | |||
| } else if (cluster->IsIndependent()) { | |||
| stage++; | |||
| } | |||
| } | |||
| ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known | |||
| << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl; | |||
| << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode | |||
| << ", stage:" << stage << std::endl; | |||
| for (const auto &cluster : unique_clusters_) { | |||
| ss << " " << cluster->DebugString() << std::endl; | |||
| } | |||
| @@ -244,12 +276,15 @@ Status DynamicShapePartitioner::InitClusters() { | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| Cluster::Type type = Cluster::DATA; | |||
| bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty(); | |||
| REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null"); | |||
| if (node->GetType() == DATA) { | |||
| type = Cluster::DATA; | |||
| } else if (is_input) { | |||
| type = Cluster::INPUT_NODE; | |||
| } else if (node->GetType() == NETOUTPUT) { | |||
| type = Cluster::NETOUTPUT; | |||
| } else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) { | |||
| type = Cluster::STAGE; | |||
| } else if (unknown_shape_nodes_.count(node) > 0) { | |||
| type = Cluster::UNKNOWN_SHAPE; | |||
| } else { | |||
| @@ -332,6 +367,9 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) { | |||
| void DynamicShapePartitioner::MergeClustersUnknownShape() { | |||
| // Merge unknown shape clusters | |||
| for (const auto &cluster : ordered_cluster_) { | |||
| if (cluster->IsIndependent()) { | |||
| continue; | |||
| } | |||
| for (const auto &in_cluster : cluster->Inputs()) { | |||
| if (!in_cluster->IsUnknownShape()) { | |||
| continue; | |||
| @@ -351,6 +389,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() { | |||
| void DynamicShapePartitioner::MergeClustersKnownShape() { | |||
| // Merge known shape clusters | |||
| for (const auto &cluster : ordered_cluster_) { | |||
| if (cluster->IsIndependent()) { | |||
| continue; | |||
| } | |||
| if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { | |||
| auto in_cluster = *(cluster->Inputs().begin()); | |||
| in_cluster->Merge(cluster); | |||
| @@ -578,6 +619,7 @@ void Cluster::UpdateRank(size_t rank) { | |||
| bool Cluster::IsData() const { return type_ == DATA; }; | |||
| bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; }; | |||
| bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; }; | |||
| bool Cluster::IsIndependent() const { return type_ == STAGE; }; | |||
| bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; }; | |||
| bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; }; | |||
| bool Cluster::IsRefVariable() const { | |||
| @@ -613,6 +655,9 @@ void Cluster::RemoveOutput(ClusterPtr out) { | |||
| out->in_clusters_.end()); | |||
| }; | |||
| void Cluster::Merge(ClusterPtr other) { | |||
| if (other->IsIndependent()) { | |||
| return; | |||
| } | |||
| nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end()); | |||
| other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()), | |||
| other->in_clusters_.end()); | |||
| @@ -661,7 +706,9 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) { | |||
| std::unordered_set<ClusterPtr> forward_reached_clusters; | |||
| std::unordered_set<ClusterPtr> backward_reached_clusters; | |||
| std::vector<ClusterPtr> path_clusters; | |||
| if (other->IsIndependent()) { | |||
| return path_clusters; | |||
| } | |||
| if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == | |||
| other->out_clusters_.end()) { | |||
| return path_clusters; | |||
| @@ -744,7 +791,7 @@ Status Cluster::BuildFrame() { | |||
| } | |||
| } | |||
| } | |||
| if (IsData()) { | |||
| if (IsData() || IsIndependent()) { | |||
| for (const auto &anchor : node->GetAllOutDataAnchors()) { | |||
| AddFrameOutput(anchor); | |||
| } | |||
| @@ -860,7 +907,7 @@ Status Cluster::CombinePartitionFrame() { | |||
| } | |||
| Status Cluster::BuildPartitionSubgraph() { | |||
| if (IsData() || IsNetOutput()) { | |||
| if (IsData() || IsNetOutput() || IsIndependent()) { | |||
| return SUCCESS; | |||
| } | |||
| int64_t parent_node_index = 0; | |||
| @@ -32,7 +32,7 @@ class DynamicShapePartitioner { | |||
| // DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT. | |||
| class Cluster : public std::enable_shared_from_this<Cluster> { | |||
| public: | |||
| enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE }; | |||
| enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE }; | |||
| Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner) | |||
| : id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) { | |||
| nodes_.push_back(node); | |||
| @@ -45,6 +45,7 @@ class DynamicShapePartitioner { | |||
| bool IsData() const; | |||
| bool IsKnownShape() const; | |||
| bool IsUnknownShape() const; | |||
| bool IsIndependent() const; | |||
| bool IsNetOutput() const; | |||
| std::vector<std::shared_ptr<Cluster>> Inputs() const; | |||
| std::vector<std::shared_ptr<Cluster>> Outputs() const; | |||
| @@ -25,6 +25,10 @@ | |||
| #include "common/types.h" | |||
| namespace ge { | |||
| namespace { | |||
| const std::set<std::string> kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA }; | |||
| } | |||
| Status StagePartitioner::Partition() { | |||
| GE_CHECK_NOTNULL(root_graph_); | |||
| if (root_graph_->GetParentGraph() != nullptr) { | |||
| @@ -37,6 +41,10 @@ Status StagePartitioner::Partition() { | |||
| if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { | |||
| continue; | |||
| } | |||
| if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) { | |||
| continue; | |||
| } | |||
| GELOGD("original node %s for stage %u", node->GetName().c_str(), level); | |||
| stage_nodes_[level].insert(node); | |||
| } | |||
| if (stage_nodes_.empty()) { | |||
| @@ -54,6 +62,13 @@ Status StagePartitioner::Partition() { | |||
| return FAILED; | |||
| } | |||
| root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { | |||
| uint32_t a_level = UINT32_MAX; | |||
| (void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level); | |||
| uint32_t b_level = UINT32_MAX; | |||
| (void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level); | |||
| return a_level < b_level; | |||
| }); | |||
| if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " | |||
| "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); | |||
| @@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() { | |||
| auto node = nodes.top(); | |||
| nodes.pop(); | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { | |||
| uint32_t tmp_level = cur_stage_level; | |||
| (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); | |||
| if (tmp_level != cur_stage_level) { | |||
| continue; | |||
| } | |||
| for (const auto &in_node : node->GetInAllNodes()) { | |||
| if (visited_stage_nodes.count(in_node) != 0) { | |||
| continue; | |||
| } | |||
| if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { | |||
| GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level); | |||
| if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) { | |||
| GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level); | |||
| continue; | |||
| } | |||
| nodes.push(in_node); | |||
| } | |||
| if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { | |||
| GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); | |||
| visited_stage_nodes.emplace(node); | |||
| } | |||
| for (const auto &node : visited_stage_nodes) { | |||
| @@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const | |||
| op_desc->AddSubgraphName("f"); | |||
| op_desc->SetSubgraphInstanceName(0, graph_name); | |||
| if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { | |||
| GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str()); | |||
| return nullptr; | |||
| } | |||
| NodePtr subgraph_node = root_graph_->AddNode(op_desc); | |||
| if (subgraph_node == nullptr) { | |||
| GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); | |||
| @@ -0,0 +1,119 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/passes/fuse_data_nodes_with_common_input_pass.h" | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <set> | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| using std::map; | |||
| using std::vector; | |||
| using std::set; | |||
| using std::string; | |||
| namespace ge { | |||
| Status FuseDataNodesWithCommonInputPass::Run(ge::ComputeGraphPtr graph) { | |||
| if (graph == nullptr) { | |||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); | |||
| return GE_GRAPH_PARAM_NULLPTR; | |||
| } | |||
| GELOGD("FuseDataNodesWithCommonInputPass in."); | |||
| // key: subgraph, value:--key: peer out anchor to parent node, --value: parent indexes to parent node | |||
| map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> subgraphs_to_need_fuse_nodes_info; | |||
| if (InitNeedFuseNodesInfo(graph, subgraphs_to_need_fuse_nodes_info) != SUCCESS) { | |||
| GELOGE(FAILED, "InitNeedFuseNodesInfo failed."); | |||
| return FAILED; | |||
| } | |||
| return FuseDataNodes(subgraphs_to_need_fuse_nodes_info); | |||
| } | |||
| Status FuseDataNodesWithCommonInputPass::InitNeedFuseNodesInfo(ComputeGraphPtr &graph, | |||
| map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info) { | |||
| for (const auto &subgraph : graph->GetAllSubgraphs()) { | |||
| GE_CHECK_NOTNULL(subgraph); | |||
| auto parent_node = subgraph->GetParentNode(); | |||
| GE_CHECK_NOTNULL(parent_node); | |||
| if (parent_node->GetType() == CASE || parent_node->GetType() == IF) { | |||
| auto &peer_out_anchors_to_parent_indexes = subgraphs_to_need_fuse_nodes_info[subgraph]; | |||
| for (const auto &in_data_anchor : parent_node->GetAllInDataAnchors()) { | |||
| GE_CHECK_NOTNULL(in_data_anchor); | |||
| OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| uint32_t parent_index = static_cast<uint32_t>(in_data_anchor->GetIdx()); | |||
| GE_CHECK_NOTNULL(peer_out_anchor); | |||
| peer_out_anchors_to_parent_indexes[peer_out_anchor].insert(parent_index); | |||
| GELOGD("Peer node %s is the %d input of parent node %s in %s.", | |||
| peer_out_anchor->GetOwnerNode()->GetName().c_str(), parent_index, parent_node->GetName().c_str(), | |||
| subgraph->GetName().c_str()); | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status FuseDataNodesWithCommonInputPass::FuseDataNodes( | |||
| const map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info) { | |||
| for (const auto &subgraph_to_need_fuse_nodes_info : subgraphs_to_need_fuse_nodes_info) { | |||
| auto subgraph = subgraph_to_need_fuse_nodes_info.first; | |||
| for (const auto &peer_out_anchors_to_parent_indexes : subgraph_to_need_fuse_nodes_info.second) { | |||
| if (peer_out_anchors_to_parent_indexes.second.size() <= 1) { | |||
| continue; | |||
| } | |||
| // key: out anchor, value: data nodes with common input will be fused | |||
| map<OutDataAnchorPtr, vector<NodePtr>> peer_out_anchors_to_need_fuse_nodes; | |||
| for (const auto &node : subgraph->GetDirectNode()) { | |||
| if (node->GetType() != DATA) { | |||
| continue; | |||
| } | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| uint32_t parent_index = 0; | |||
| if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
| if (peer_out_anchors_to_parent_indexes.second.count(parent_index) > 0) { | |||
| peer_out_anchors_to_need_fuse_nodes[peer_out_anchors_to_parent_indexes.first].emplace_back(node); | |||
| } | |||
| } | |||
| } | |||
| for (const auto &peer_out_anchor_to_need_fuse_nodes : peer_out_anchors_to_need_fuse_nodes) { | |||
| auto need_fuse_data_nodes = peer_out_anchor_to_need_fuse_nodes.second; | |||
| auto first_node = need_fuse_data_nodes.at(0); | |||
| for (size_t i = 1; i < need_fuse_data_nodes.size(); ++i) { | |||
| auto node = need_fuse_data_nodes.at(i); | |||
| GELOGI("Replace redundant data node %s by %s exist in graph: %s.", node->GetName().c_str(), | |||
| first_node->GetName().c_str(), subgraph->GetName().c_str()); | |||
| // the data node which can be fused has none input(both data and control in) | |||
| if (GraphUtils::MoveOutCtrlEdges(node, first_node) != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| if (GraphUtils::ReplaceNodeDataAnchors(first_node, node, {}, {0}) != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| if (GraphUtils::RemoveNodeWithoutRelink(subgraph, node) != SUCCESS) { | |||
| GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,38 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ | |||
| #define GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ | |||
| #include <set> | |||
| #include <map> | |||
| #include <vector> | |||
| #include "graph/types.h" | |||
| #include "inc/graph_pass.h" | |||
| namespace ge { | |||
| class FuseDataNodesWithCommonInputPass : public GraphPass { | |||
| public: | |||
| Status Run(ge::ComputeGraphPtr graph) override; | |||
| private: | |||
| Status InitNeedFuseNodesInfo(ComputeGraphPtr &graph, | |||
| map<ComputeGraphPtr, map<OutDataAnchorPtr, std::set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info); | |||
| Status FuseDataNodes( | |||
| const map<ComputeGraphPtr, map<OutDataAnchorPtr, std::set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_ | |||
| @@ -145,6 +145,7 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra | |||
| return GE_GRAPH_EMPTY_SUBGRAPH; | |||
| } | |||
| set<NodePtr> ctrl_only_const_nodes; | |||
| auto &data_nodes = all_data_nodes[subgraph]; | |||
| auto &const_nodes = all_const_nodes[subgraph]; | |||
| for (auto &node : subgraph->GetDirectNode()) { | |||
| @@ -178,15 +179,30 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra | |||
| peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx())); | |||
| } | |||
| if (peer_name_list.empty()) { | |||
| GELOGI("%s, Const: %s, no data output", subgraph->GetName().c_str(), node->GetName().c_str()); | |||
| const auto in_all_nodes = node->GetInAllNodes(); | |||
| if (in_all_nodes.empty() || std::all_of(in_all_nodes.begin(), in_all_nodes.end(), | |||
| [](const NodePtr &n) { return n->GetType() == DATA; })) { | |||
| ctrl_only_const_nodes.insert(node); | |||
| } | |||
| continue; | |||
| } | |||
| string key_of_const; | |||
| for (const string &name : peer_name_list) { | |||
| key_of_const += (key_of_const.empty() ? name : "_" + name); | |||
| } | |||
| const_nodes[key_of_const] = node; | |||
| GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str()); | |||
| GELOGD("%s, Const: %s, Key: %s", subgraph->GetName().c_str(), node->GetName().c_str(), key_of_const.c_str()); | |||
| } | |||
| } | |||
| for (auto &node : ctrl_only_const_nodes) { | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, node), | |||
| "Remove node without relink failed, node: %s", node->GetName().c_str()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| @@ -352,7 +368,8 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra | |||
| const auto owner_node = out_anchor->GetOwnerNode(); | |||
| GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str()); | |||
| if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) { | |||
| graph->RemoveNode(owner_node); | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), | |||
| "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); | |||
| } | |||
| } | |||
| @@ -414,7 +431,8 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra | |||
| const auto owner_node = out_anchor->GetOwnerNode(); | |||
| GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str()); | |||
| if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) { | |||
| graph->RemoveNode(owner_node); | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node), | |||
| "Remove node without relink failed, node: %s", owner_node->GetName().c_str()); | |||
| } | |||
| } | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed"); | |||
| @@ -442,7 +460,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph | |||
| const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes, | |||
| const string &node_key, uint32_t parent_index) { | |||
| if (node_key.empty() || parent_index == kInvalidParent) { | |||
| GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str()); | |||
| GELOGE(FAILED, "Graph: %s, node key: %s, parent index: %u invalid", | |||
| graph->GetName().c_str(), node_key.c_str(), parent_index); | |||
| return FAILED; | |||
| } | |||
| @@ -472,7 +491,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph | |||
| return FAILED; | |||
| } | |||
| GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed"); | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, move_node), | |||
| "Remove node without relink failed, node: %s", move_node->GetName().c_str()); | |||
| GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str()); | |||
| } | |||
| @@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node | |||
| GE_CHECK_NOTNULL(in_node); | |||
| // Need insert memcpy | |||
| // 1. Const->NetOutput in subgraph | |||
| // 1. Const->NetOutput in subgraph & parent graph is known | |||
| // 2. AtomicOp->NetOutput in subgraph | |||
| // 3. OutputContinuesRequiredOp->NetOutput in subgraph | |||
| // 4. Data->NetOutput in subgraph but parent_node is not while | |||
| // 5. While->NetOutput in known subgraph | |||
| std::string op_type; | |||
| bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || | |||
| bool insert_flag = | |||
| (NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) || | |||
| IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || | |||
| ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || | |||
| (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && | |||
| (kWhileOpTypes.count(in_node->GetType()) != 0)); | |||
| (kWhileOpTypes.count(in_node->GetType()) != 0)); | |||
| if (insert_flag) { | |||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||
| @@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp); | |||
| REGISTER_OP_CREATOR(RandomUniform, HostOp); | |||
| REGISTER_OP_CREATOR(Add, HostOp); | |||
| REGISTER_OP_CREATOR(Mul, HostOp); | |||
| REGISTER_OP_CREATOR(ConcatV2, HostOp); | |||
| REGISTER_OP_CREATOR(Data, HostOp); | |||
| REGISTER_OP_CREATOR(Fill, HostOp); | |||
| } // namespace host_cpu | |||
| } // namespace ge | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include "hybrid/executor/hybrid_model_async_executor.h" | |||
| #include "graph/load/new_model_manager/model_utils.h" | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/ge_context.h" | |||
| @@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis | |||
| run_flag_ = true; | |||
| listener_ = listener; | |||
| future_ = std::async(std::launch::async, [&]() -> Status { | |||
| GetThreadLocalContext() = *executor_->GetContext()->ge_context; | |||
| GetContext().SetSessionId(executor_->GetContext()->session_id); | |||
| return RunInternal(); | |||
| }); | |||
| @@ -229,7 +230,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||
| } | |||
| GE_CHECK_GE(tensor_size, 0); | |||
| auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||
| AllocationAttr attr; | |||
| if (GetContext().GetHostExecFlag()) { | |||
| attr.SetMemType(HOST_DDR); | |||
| } | |||
| auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr); | |||
| GE_CHECK_NOTNULL(tensor_buffer); | |||
| args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release())); | |||
| @@ -21,7 +21,7 @@ | |||
| #include <future> | |||
| #include "external/ge/ge_api_error_codes.h" | |||
| #include "external/ge/ge_api_types.h" | |||
| #include "graph/load/new_model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include "hybrid/executor/hybrid_model_executor.h" | |||
| #include "runtime/stream.h" | |||