Browse Source

Merge branch 'development' of gitee.com:mindspore/graphengine into development

pull/931/head
zhaoxinxin Gitee 5 years ago
parent
commit
6d2261d3c9
100 changed files with 945 additions and 461 deletions
  1. +16
    -14
      build.sh
  2. +68
    -63
      ge/CMakeLists.txt
  3. +2
    -0
      ge/common/CMakeLists.txt
  4. +1
    -1
      ge/common/helper/model_cache_helper.cc
  5. +1
    -1
      ge/common/helper/model_helper.cc
  6. +4
    -2
      ge/common/profiling/profiling_manager.cc
  7. +1
    -0
      ge/common/types.cc
  8. +32
    -31
      ge/executor/CMakeLists.txt
  9. +3
    -3
      ge/executor/ge_executor.cc
  10. +32
    -31
      ge/executor/module.mk
  11. +33
    -32
      ge/ge_inference.mk
  12. +34
    -32
      ge/ge_runner.mk
  13. +2
    -0
      ge/ge_runtime/CMakeLists.txt
  14. +9
    -13
      ge/graph/build/graph_builder.cc
  15. +4
    -5
      ge/graph/build/graph_builder.h
  16. +7
    -0
      ge/graph/build/memory/block_mem_assigner.cc
  17. +6
    -2
      ge/graph/build/memory/var_mem_assign_util.cc
  18. +27
    -2
      ge/graph/build/stream_allocator.cc
  19. +1
    -1
      ge/graph/execute/graph_execute.cc
  20. +2
    -2
      ge/graph/load/graph_loader.cc
  21. +1
    -1
      ge/graph/load/model_manager/aipp_utils.cc
  22. +0
    -0
      ge/graph/load/model_manager/aipp_utils.h
  23. +1
    -1
      ge/graph/load/model_manager/cpu_queue_schedule.cc
  24. +2
    -2
      ge/graph/load/model_manager/cpu_queue_schedule.h
  25. +2
    -2
      ge/graph/load/model_manager/data_dumper.cc
  26. +0
    -0
      ge/graph/load/model_manager/data_dumper.h
  27. +1
    -1
      ge/graph/load/model_manager/data_inputer.cc
  28. +0
    -0
      ge/graph/load/model_manager/data_inputer.h
  29. +97
    -57
      ge/graph/load/model_manager/davinci_model.cc
  30. +11
    -7
      ge/graph/load/model_manager/davinci_model.h
  31. +1
    -1
      ge/graph/load/model_manager/davinci_model_parser.cc
  32. +0
    -0
      ge/graph/load/model_manager/davinci_model_parser.h
  33. +16
    -8
      ge/graph/load/model_manager/model_manager.cc
  34. +0
    -0
      ge/graph/load/model_manager/model_manager.h
  35. +47
    -19
      ge/graph/load/model_manager/model_utils.cc
  36. +10
    -1
      ge/graph/load/model_manager/model_utils.h
  37. +2
    -2
      ge/graph/load/model_manager/task_info/end_graph_task_info.cc
  38. +1
    -1
      ge/graph/load/model_manager/task_info/end_graph_task_info.h
  39. +2
    -2
      ge/graph/load/model_manager/task_info/event_record_task_info.cc
  40. +1
    -1
      ge/graph/load/model_manager/task_info/event_record_task_info.h
  41. +2
    -2
      ge/graph/load/model_manager/task_info/event_wait_task_info.cc
  42. +1
    -1
      ge/graph/load/model_manager/task_info/event_wait_task_info.h
  43. +2
    -2
      ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
  44. +1
    -1
      ge/graph/load/model_manager/task_info/fusion_start_task_info.h
  45. +2
    -2
      ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
  46. +1
    -1
      ge/graph/load/model_manager/task_info/fusion_stop_task_info.h
  47. +3
    -3
      ge/graph/load/model_manager/task_info/hccl_task_info.cc
  48. +1
    -1
      ge/graph/load/model_manager/task_info/hccl_task_info.h
  49. +3
    -3
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
  50. +1
    -1
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
  51. +4
    -4
      ge/graph/load/model_manager/task_info/kernel_task_info.cc
  52. +1
    -1
      ge/graph/load/model_manager/task_info/kernel_task_info.h
  53. +2
    -2
      ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
  54. +1
    -1
      ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
  55. +2
    -2
      ge/graph/load/model_manager/task_info/label_set_task_info.cc
  56. +1
    -1
      ge/graph/load/model_manager/task_info/label_set_task_info.h
  57. +2
    -2
      ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
  58. +1
    -1
      ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
  59. +2
    -2
      ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
  60. +1
    -1
      ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h
  61. +2
    -2
      ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
  62. +1
    -1
      ge/graph/load/model_manager/task_info/memcpy_async_task_info.h
  63. +2
    -2
      ge/graph/load/model_manager/task_info/model_exit_task_info.cc
  64. +1
    -1
      ge/graph/load/model_manager/task_info/model_exit_task_info.h
  65. +2
    -2
      ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
  66. +1
    -1
      ge/graph/load/model_manager/task_info/profiler_trace_task_info.h
  67. +2
    -2
      ge/graph/load/model_manager/task_info/stream_active_task_info.cc
  68. +1
    -1
      ge/graph/load/model_manager/task_info/stream_active_task_info.h
  69. +3
    -3
      ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
  70. +1
    -1
      ge/graph/load/model_manager/task_info/stream_switch_task_info.h
  71. +3
    -3
      ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
  72. +1
    -1
      ge/graph/load/model_manager/task_info/stream_switchn_task_info.h
  73. +0
    -0
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
  74. +0
    -0
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h
  75. +0
    -0
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
  76. +0
    -0
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h
  77. +1
    -1
      ge/graph/load/model_manager/task_info/task_info.cc
  78. +2
    -2
      ge/graph/load/model_manager/task_info/task_info.h
  79. +0
    -0
      ge/graph/load/model_manager/task_info/task_info_factory.h
  80. +0
    -0
      ge/graph/load/model_manager/tbe_handle_store.cc
  81. +0
    -0
      ge/graph/load/model_manager/tbe_handle_store.h
  82. +0
    -0
      ge/graph/load/model_manager/ts_mem_mall.h
  83. +3
    -3
      ge/graph/load/model_manager/zero_copy_offset.cc
  84. +2
    -2
      ge/graph/load/model_manager/zero_copy_offset.h
  85. +2
    -2
      ge/graph/load/model_manager/zero_copy_task.cc
  86. +0
    -0
      ge/graph/load/model_manager/zero_copy_task.h
  87. +26
    -8
      ge/graph/manager/graph_manager.cc
  88. +58
    -16
      ge/graph/manager/graph_var_manager.cc
  89. +25
    -4
      ge/graph/manager/graph_var_manager.h
  90. +4
    -0
      ge/graph/manager/rdma_pool_allocator.h
  91. +57
    -10
      ge/graph/partition/dynamic_shape_partition.cc
  92. +2
    -1
      ge/graph/partition/dynamic_shape_partition.h
  93. +32
    -6
      ge/graph/partition/stage_partition.cc
  94. +119
    -0
      ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc
  95. +38
    -0
      ge/graph/passes/fuse_data_nodes_with_common_input_pass.h
  96. +25
    -5
      ge/graph/passes/subgraph_const_migration_pass.cc
  97. +4
    -3
      ge/graph/passes/subgraph_pass.cc
  98. +3
    -0
      ge/host_cpu_engine/ops_kernel_store/op/host_op.cc
  99. +7
    -2
      ge/hybrid/executor/hybrid_model_async_executor.cc
  100. +1
    -1
      ge/hybrid/executor/hybrid_model_async_executor.h

+ 16
- 14
build.sh View File

@@ -134,11 +134,7 @@ build_graphengine()
mk_dir "${BUILD_PATH}"
cd "${BUILD_PATH}"

if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"
else
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}"
fi
CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH"

if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_COV=ON"
@@ -156,7 +152,13 @@ build_graphengine()
if [[ "X$ENABLE_GITEE" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"

if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"
else
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}"
fi

echo "${CMAKE_ARGS}"
cmake ${CMAKE_ARGS} ..
if [ $? -ne 0 ]
@@ -233,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
# fi

# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
echo "Generating coverage statistics, please wait..."
cd ${BASEPATH}
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
echo "Generating coverage statistics, please wait..."
cd ${BASEPATH}
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
fi

# generate output package in tar form, including ut/st libraries/executables


+ 68
- 63
ge/CMakeLists.txt View File

@@ -35,6 +35,7 @@ protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
############ libge_proto_common.a ############
add_library(ge_proto_common STATIC
${PROTO_HEADER_HDRS}
${PROTO_SRCS}
)

@@ -55,6 +56,7 @@ target_link_libraries(ge_proto_common PRIVATE

############ libge_proto_client.a ############
add_library(ge_proto_client STATIC
${PROTO_HEADER_HDRS}
${PROTO_CLIENT_SRCS}
)

@@ -127,38 +129,38 @@ set(TRAIN_SRC_LIST
"graph/label/partitioned_call_label_maker.cc"
"graph/label/while_label_maker.cc"
"graph/load/graph_loader.cc"
"graph/load/new_model_manager/cpu_queue_schedule.cc"
"graph/load/new_model_manager/data_dumper.cc"
"graph/load/new_model_manager/data_inputer.cc"
"graph/load/new_model_manager/davinci_model.cc"
"graph/load/new_model_manager/davinci_model_parser.cc"
"graph/load/new_model_manager/model_manager.cc"
"graph/load/new_model_manager/model_utils.cc"
"graph/load/new_model_manager/aipp_utils.cc"
"graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"graph/load/new_model_manager/task_info/event_record_task_info.cc"
"graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/new_model_manager/task_info/hccl_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_task_info.cc"
"graph/load/new_model_manager/task_info/label_set_task_info.cc"
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/new_model_manager/tbe_handle_store.cc"
"graph/load/new_model_manager/zero_copy_task.cc"
"graph/load/new_model_manager/zero_copy_offset.cc"
"graph/load/model_manager/cpu_queue_schedule.cc"
"graph/load/model_manager/data_dumper.cc"
"graph/load/model_manager/data_inputer.cc"
"graph/load/model_manager/davinci_model.cc"
"graph/load/model_manager/davinci_model_parser.cc"
"graph/load/model_manager/model_manager.cc"
"graph/load/model_manager/model_utils.cc"
"graph/load/model_manager/aipp_utils.cc"
"graph/load/model_manager/task_info/end_graph_task_info.cc"
"graph/load/model_manager/task_info/model_exit_task_info.cc"
"graph/load/model_manager/task_info/event_record_task_info.cc"
"graph/load/model_manager/task_info/event_wait_task_info.cc"
"graph/load/model_manager/task_info/fusion_start_task_info.cc"
"graph/load/model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/model_manager/task_info/hccl_task_info.cc"
"graph/load/model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/model_manager/task_info/kernel_task_info.cc"
"graph/load/model_manager/task_info/label_set_task_info.cc"
"graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/model_manager/task_info/stream_active_task_info.cc"
"graph/load/model_manager/task_info/stream_switch_task_info.cc"
"graph/load/model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/model_manager/task_info/task_info.cc"
"graph/load/model_manager/tbe_handle_store.cc"
"graph/load/model_manager/zero_copy_task.cc"
"graph/load/model_manager/zero_copy_offset.cc"
"graph/manager/graph_context.cc"
"graph/manager/graph_manager.cc"
"graph/manager/graph_manager_utils.cc"
@@ -200,6 +202,7 @@ set(TRAIN_SRC_LIST
"graph/passes/compile_nodes_pass.cc"
"graph/passes/constant_folding_pass.cc"
"graph/passes/constant_fuse_same_pass.cc"
"graph/passes/fuse_data_nodes_with_common_input_pass.cc"
"graph/passes/remove_same_const_pass.cc"
"graph/passes/useless_control_out_remove_pass.cc"
"graph/passes/control_trigger_pass.cc"
@@ -372,6 +375,7 @@ set(TRAIN_SRC_LIST
"hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
"hybrid/node_executor/controlop/control_op_executor.cc"
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"hybrid/node_executor/hccl/hccl_node_executor.cc"
@@ -482,6 +486,7 @@ set(INFER_SRC_LIST
"graph/passes/net_output_pass.cc"
"graph/passes/replace_transshape_pass.cc"
"graph/passes/constant_fuse_same_pass.cc"
"graph/passes/fuse_data_nodes_with_common_input_pass.cc"
"graph/passes/print_op_pass.cc"
"graph/passes/no_use_reshape_remove_pass.cc"
"graph/passes/iterator_op_pass.cc"
@@ -601,37 +606,37 @@ set(INFER_SRC_LIST
"graph/manager/util/rt_context_util.cc"
"graph/manager/util/variable_accelerate_ctrl.cc"
"graph/manager/util/debug.cc"
"graph/load/new_model_manager/model_manager.cc"
"graph/load/new_model_manager/data_inputer.cc"
"graph/load/new_model_manager/davinci_model.cc"
"graph/load/new_model_manager/davinci_model_parser.cc"
"graph/load/new_model_manager/model_utils.cc"
"graph/load/new_model_manager/aipp_utils.cc"
"graph/load/new_model_manager/tbe_handle_store.cc"
"graph/load/new_model_manager/cpu_queue_schedule.cc"
"graph/load/new_model_manager/zero_copy_task.cc"
"graph/load/new_model_manager/zero_copy_offset.cc"
"graph/load/new_model_manager/data_dumper.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/new_model_manager/task_info/event_record_task_info.cc"
"graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_task_info.cc"
"graph/load/new_model_manager/task_info/label_set_task_info.cc"
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/model_manager/model_manager.cc"
"graph/load/model_manager/data_inputer.cc"
"graph/load/model_manager/davinci_model.cc"
"graph/load/model_manager/davinci_model_parser.cc"
"graph/load/model_manager/model_utils.cc"
"graph/load/model_manager/aipp_utils.cc"
"graph/load/model_manager/tbe_handle_store.cc"
"graph/load/model_manager/cpu_queue_schedule.cc"
"graph/load/model_manager/zero_copy_task.cc"
"graph/load/model_manager/zero_copy_offset.cc"
"graph/load/model_manager/data_dumper.cc"
"graph/load/model_manager/task_info/task_info.cc"
"graph/load/model_manager/task_info/event_record_task_info.cc"
"graph/load/model_manager/task_info/event_wait_task_info.cc"
"graph/load/model_manager/task_info/fusion_start_task_info.cc"
"graph/load/model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/model_manager/task_info/kernel_task_info.cc"
"graph/load/model_manager/task_info/label_set_task_info.cc"
"graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/model_manager/task_info/stream_active_task_info.cc"
"graph/load/model_manager/task_info/stream_switch_task_info.cc"
"graph/load/model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/model_manager/task_info/end_graph_task_info.cc"
"graph/load/model_manager/task_info/model_exit_task_info.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
"single_op/task/op_task.cc"
"single_op/task/build_task_utils.cc"
"single_op/task/tbe_task_builder.cc"


+ 2
- 0
ge/common/CMakeLists.txt View File

@@ -187,6 +187,8 @@ target_compile_options(ge_common PRIVATE
-fvisibility=hidden
-O2
-Werror
-Wno-deprecated-declarations
-fno-common
)

target_include_directories(ge_common PRIVATE


+ 1
- 1
ge/common/helper/model_cache_helper.cc View File

@@ -28,7 +28,7 @@
#include "framework/common/util.h"
#include "graph/detail/attributes_holder.h"
#include "graph/detail/model_serialize_imp.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/model.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/tensor_utils.h"


+ 1
- 1
ge/common/helper/model_helper.cc View File

@@ -23,7 +23,7 @@
#include "framework/common/debug/ge_log.h"
#include "framework/omg/version.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/graph_utils.h"



+ 4
- 2
ge/common/profiling/profiling_manager.cc View File

@@ -21,7 +21,7 @@
#include "framework/common/string_util.h"
#include "graph/ge_context.h"
#include "runtime/base.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace {
const char *const kTrainingTrace = "training_trace";
@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
uint32_t task_type = task.task_type;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append("\n");
.append(std::to_string(cur_iter_num)).append(" ")
.append(std::to_string(task_type)).append("\n");

ReporterData reporter_data{};
reporter_data.deviceId = device_id;


+ 1
- 0
ge/common/types.cc View File

@@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive");
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead");
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead");
REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite");
REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite");

REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign");
REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp");


+ 32
- 31
ge/executor/CMakeLists.txt View File

@@ -32,37 +32,37 @@ set(SRC_LIST
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"../model/ge_model.cc"
"../model/ge_root_model.cc"
"../graph/load/new_model_manager/davinci_model.cc"
"../graph/load/new_model_manager/davinci_model_parser.cc"
"../graph/load/new_model_manager/model_manager.cc"
"../graph/load/new_model_manager/tbe_handle_store.cc"
"../graph/load/new_model_manager/cpu_queue_schedule.cc"
"../graph/load/new_model_manager/model_utils.cc"
"../graph/load/new_model_manager/aipp_utils.cc"
"../graph/load/new_model_manager/data_inputer.cc"
"../graph/load/new_model_manager/data_dumper.cc"
"../graph/load/new_model_manager/zero_copy_task.cc"
"../graph/load/new_model_manager/zero_copy_offset.cc"
"../graph/load/new_model_manager/task_info/task_info.cc"
"../graph/load/new_model_manager/task_info/event_record_task_info.cc"
"../graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"../graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"../graph/load/new_model_manager/task_info/kernel_task_info.cc"
"../graph/load/new_model_manager/task_info/label_set_task_info.cc"
"../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"../graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"../graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"../graph/load/model_manager/davinci_model.cc"
"../graph/load/model_manager/davinci_model_parser.cc"
"../graph/load/model_manager/model_manager.cc"
"../graph/load/model_manager/tbe_handle_store.cc"
"../graph/load/model_manager/cpu_queue_schedule.cc"
"../graph/load/model_manager/model_utils.cc"
"../graph/load/model_manager/aipp_utils.cc"
"../graph/load/model_manager/data_inputer.cc"
"../graph/load/model_manager/data_dumper.cc"
"../graph/load/model_manager/zero_copy_task.cc"
"../graph/load/model_manager/zero_copy_offset.cc"
"../graph/load/model_manager/task_info/task_info.cc"
"../graph/load/model_manager/task_info/event_record_task_info.cc"
"../graph/load/model_manager/task_info/event_wait_task_info.cc"
"../graph/load/model_manager/task_info/fusion_start_task_info.cc"
"../graph/load/model_manager/task_info/fusion_stop_task_info.cc"
"../graph/load/model_manager/task_info/kernel_ex_task_info.cc"
"../graph/load/model_manager/task_info/kernel_task_info.cc"
"../graph/load/model_manager/task_info/label_set_task_info.cc"
"../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
"../graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
"../graph/load/model_manager/task_info/memcpy_async_task_info.cc"
"../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
"../graph/load/model_manager/task_info/profiler_trace_task_info.cc"
"../graph/load/model_manager/task_info/stream_active_task_info.cc"
"../graph/load/model_manager/task_info/stream_switch_task_info.cc"
"../graph/load/model_manager/task_info/stream_switchn_task_info.cc"
"../graph/load/model_manager/task_info/end_graph_task_info.cc"
"../graph/load/model_manager/task_info/model_exit_task_info.cc"
"../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
"../graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
"../graph/common/local_context.cc"
"../opskernel_manager/ops_kernel_builder_manager.cc"
"../single_op/single_op_manager.cc"
@@ -104,6 +104,7 @@ set(SRC_LIST
"../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
"../hybrid/node_executor/controlop/control_op_executor.cc"
"../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"../hybrid/node_executor/rts/rts_node_executor.cc"


+ 3
- 3
ge/executor/ge_executor.cc View File

@@ -29,15 +29,15 @@
#include "framework/common/util.h"
#include "graph/execute/graph_execute.h"
#include "graph/load/graph_loader.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/model.h"
#include "graph/utils/graph_utils.h"
#include "mmpa/mmpa_api.h"
#include "single_op/single_op_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"

using std::string;


+ 32
- 31
ge/executor/module.mk View File

@@ -22,37 +22,37 @@ local_ge_executor_src_files := \
../graph/manager/util/debug.cc \
../model/ge_model.cc \
../model/ge_root_model.cc \
../graph/load/new_model_manager/davinci_model.cc \
../graph/load/new_model_manager/davinci_model_parser.cc \
../graph/load/new_model_manager/model_manager.cc \
../graph/load/new_model_manager/tbe_handle_store.cc \
../graph/load/new_model_manager/cpu_queue_schedule.cc \
../graph/load/new_model_manager/model_utils.cc \
../graph/load/new_model_manager/aipp_utils.cc \
../graph/load/new_model_manager/data_inputer.cc \
../graph/load/new_model_manager/data_dumper.cc \
../graph/load/new_model_manager/zero_copy_task.cc \
../graph/load/new_model_manager/zero_copy_offset.cc \
../graph/load/new_model_manager/task_info/task_info.cc \
../graph/load/new_model_manager/task_info/event_record_task_info.cc \
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \
../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
../graph/load/new_model_manager/task_info/kernel_task_info.cc \
../graph/load/new_model_manager/task_info/label_set_task_info.cc \
../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
../graph/load/new_model_manager/task_info/stream_active_task_info.cc \
../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
../graph/load/new_model_manager/task_info/end_graph_task_info.cc \
../graph/load/new_model_manager/task_info/model_exit_task_info.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
../graph/load/model_manager/davinci_model.cc \
../graph/load/model_manager/davinci_model_parser.cc \
../graph/load/model_manager/model_manager.cc \
../graph/load/model_manager/tbe_handle_store.cc \
../graph/load/model_manager/cpu_queue_schedule.cc \
../graph/load/model_manager/model_utils.cc \
../graph/load/model_manager/aipp_utils.cc \
../graph/load/model_manager/data_inputer.cc \
../graph/load/model_manager/data_dumper.cc \
../graph/load/model_manager/zero_copy_task.cc \
../graph/load/model_manager/zero_copy_offset.cc \
../graph/load/model_manager/task_info/task_info.cc \
../graph/load/model_manager/task_info/event_record_task_info.cc \
../graph/load/model_manager/task_info/event_wait_task_info.cc \
../graph/load/model_manager/task_info/fusion_start_task_info.cc \
../graph/load/model_manager/task_info/fusion_stop_task_info.cc \
../graph/load/model_manager/task_info/kernel_ex_task_info.cc \
../graph/load/model_manager/task_info/kernel_task_info.cc \
../graph/load/model_manager/task_info/label_set_task_info.cc \
../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \
../graph/load/model_manager/task_info/memcpy_async_task_info.cc \
../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
../graph/load/model_manager/task_info/profiler_trace_task_info.cc \
../graph/load/model_manager/task_info/stream_active_task_info.cc \
../graph/load/model_manager/task_info/stream_switch_task_info.cc \
../graph/load/model_manager/task_info/stream_switchn_task_info.cc \
../graph/load/model_manager/task_info/end_graph_task_info.cc \
../graph/load/model_manager/task_info/model_exit_task_info.cc \
../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \
../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \
../opskernel_manager/ops_kernel_builder_manager.cc \
../single_op/single_op_manager.cc \
../single_op/single_op_model.cc \
@@ -95,6 +95,7 @@ local_ge_executor_src_files := \
../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \
../hybrid/node_executor/controlop/control_op_executor.cc \
../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
../hybrid/node_executor/rts/rts_node_executor.cc \


+ 33
- 32
ge/ge_inference.mk View File

@@ -103,6 +103,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/net_output_pass.cc \
graph/passes/replace_transshape_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/fuse_data_nodes_with_common_input_pass.cc \
graph/passes/print_op_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
@@ -227,37 +228,37 @@ OME_HOST_SRC_FILES := \
graph/manager/util/rt_context_util.cc \
graph/manager/util/variable_accelerate_ctrl.cc \
graph/manager/util/debug.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/aipp_utils.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/new_model_manager/zero_copy_offset.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/model_exit_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
graph/load/model_manager/model_manager.cc \
graph/load/model_manager/data_inputer.cc \
graph/load/model_manager/davinci_model.cc \
graph/load/model_manager/davinci_model_parser.cc \
graph/load/model_manager/model_utils.cc \
graph/load/model_manager/aipp_utils.cc \
graph/load/model_manager/tbe_handle_store.cc \
graph/load/model_manager/cpu_queue_schedule.cc \
graph/load/model_manager/zero_copy_task.cc \
graph/load/model_manager/zero_copy_offset.cc \
graph/load/model_manager/data_dumper.cc \
graph/load/model_manager/task_info/task_info.cc \
graph/load/model_manager/task_info/event_record_task_info.cc \
graph/load/model_manager/task_info/event_wait_task_info.cc \
graph/load/model_manager/task_info/fusion_start_task_info.cc \
graph/load/model_manager/task_info/fusion_stop_task_info.cc \
graph/load/model_manager/task_info/kernel_ex_task_info.cc \
graph/load/model_manager/task_info/kernel_task_info.cc \
graph/load/model_manager/task_info/label_set_task_info.cc \
graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/model_manager/task_info/memcpy_async_task_info.cc \
graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/model_manager/task_info/profiler_trace_task_info.cc \
graph/load/model_manager/task_info/stream_active_task_info.cc \
graph/load/model_manager/task_info/stream_switch_task_info.cc \
graph/load/model_manager/task_info/stream_switchn_task_info.cc \
graph/load/model_manager/task_info/end_graph_task_info.cc \
graph/load/model_manager/task_info/model_exit_task_info.cc \
graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/model_manager/task_info/super_kernel/super_kernel.cc \
single_op/task/op_task.cc \
single_op/task/build_task_utils.cc \
single_op/task/tbe_task_builder.cc \
@@ -269,7 +270,7 @@ OME_HOST_SRC_FILES := \
single_op/single_op_manager.cc \
hybrid/hybrid_davinci_model_stub.cc \
hybrid/node_executor/aicpu/aicpu_ext_info.cc \
# graph/load/new_model_manager/task_info/hccl_task_info.cc
# graph/load/model_manager/task_info/hccl_task_info.cc

OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES)



+ 34
- 32
ge/ge_runner.mk View File

@@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \
graph/label/partitioned_call_label_maker.cc \
graph/label/while_label_maker.cc \
graph/load/graph_loader.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/aipp_utils.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/model_exit_task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/hccl_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/new_model_manager/zero_copy_offset.cc \
graph/load/model_manager/cpu_queue_schedule.cc \
graph/load/model_manager/data_dumper.cc \
graph/load/model_manager/data_inputer.cc \
graph/load/model_manager/davinci_model.cc \
graph/load/model_manager/davinci_model_parser.cc \
graph/load/model_manager/model_manager.cc \
graph/load/model_manager/model_utils.cc \
graph/load/model_manager/aipp_utils.cc \
graph/load/model_manager/task_info/end_graph_task_info.cc \
graph/load/model_manager/task_info/model_exit_task_info.cc \
graph/load/model_manager/task_info/event_record_task_info.cc \
graph/load/model_manager/task_info/event_wait_task_info.cc \
graph/load/model_manager/task_info/fusion_start_task_info.cc \
graph/load/model_manager/task_info/fusion_stop_task_info.cc \
graph/load/model_manager/task_info/hccl_task_info.cc \
graph/load/model_manager/task_info/kernel_ex_task_info.cc \
graph/load/model_manager/task_info/kernel_task_info.cc \
graph/load/model_manager/task_info/label_set_task_info.cc \
graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/model_manager/task_info/memcpy_async_task_info.cc \
graph/load/model_manager/task_info/profiler_trace_task_info.cc \
graph/load/model_manager/task_info/stream_active_task_info.cc \
graph/load/model_manager/task_info/stream_switch_task_info.cc \
graph/load/model_manager/task_info/stream_switchn_task_info.cc \
graph/load/model_manager/task_info/super_kernel/super_kernel.cc \
graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/model_manager/task_info/task_info.cc \
graph/load/model_manager/tbe_handle_store.cc \
graph/load/model_manager/zero_copy_task.cc \
graph/load/model_manager/zero_copy_offset.cc \
graph/manager/graph_context.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
@@ -127,6 +127,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/compile_nodes_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/fuse_data_nodes_with_common_input_pass.cc \
graph/passes/remove_same_const_pass.cc \
graph/passes/useless_control_out_remove_pass.cc \
graph/passes/control_trigger_pass.cc \
@@ -299,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \
hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \
hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \
hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \
hybrid/node_executor/host_cpu/kernel/data_kernel.cc \
hybrid/node_executor/controlop/control_op_executor.cc \
hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
hybrid/node_executor/hccl/hccl_node_executor.cc \


+ 2
- 0
ge/ge_runtime/CMakeLists.txt View File

@@ -23,6 +23,8 @@ add_library(ge_runtime SHARED ${GE_SRC_LIST})
target_compile_options(ge_runtime PRIVATE
-Werror
-O2
-Wno-deprecated-declarations
-fno-common
)

target_compile_definitions(ge_runtime PRIVATE


+ 9
- 13
ge/graph/build/graph_builder.cc View File

@@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph
return SUCCESS;
}

Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
if (comp_graph == nullptr) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
return GE_GRAPH_PARAM_NULLPTR;
@@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
(void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) {
GE_CHK_STATUS_RET(
BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id),
"Build for dynamic shape graph failed.");
return SUCCESS;
}

GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id),
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id),
"Build for known shape graph failed.");
ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr);
return SUCCESS;
}

Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
GeModelPtr &ge_model_ptr, uint64_t session_id) {
if (ge::GetContext().GetHostExecFlag()) {
GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
@@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
}

GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
Status ret = SecondPartition(comp_graph, subgraph_list);
Status ret = SecondPartition(comp_graph);
GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
auto subgraph_map = graph_partitioner_.GetSubGraphMap();

@@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
}

Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
uint64_t session_id) {
GELOGI("Start to build BuildForDynamicShape for dynamic shape.");
@@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
}
}
// known shape build flow
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id),
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id),
"Build for known shape graph failed.");
}
ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr);
@@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
return SUCCESS;
}

Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) {
Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) {
GE_TIMESTAMP_START(GraphPartition2);
auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
if (ret != SUCCESS) {
@@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge:
return ret;
}
GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) {
subgraph_ptr_list = graph_2_subgraphlist[comp_graph];
} else {
const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) {
GELOGE(FAILED, "Find subgraph failed.");
return FAILED;
}


+ 4
- 5
ge/graph/build/graph_builder.h View File

@@ -47,8 +47,7 @@ class GraphBuilder {
GraphBuilder(const GraphBuilder &in) = delete;
GraphBuilder &operator=(const GraphBuilder &in) = delete;
virtual ~GraphBuilder() = default;
Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
void SetOptions(const GraphManagerOptions &options);

private:
@@ -59,12 +58,12 @@ class GraphBuilder {
Status UpdateDataInputSize(const ge::NodePtr &node_ptr);
Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
Status SecondPartition(ge::ComputeGraphPtr &comp_graph);
Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
uint64_t session_id = INVALID_SESSION_ID);
Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
uint64_t session_id = INVALID_SESSION_ID);


+ 7
- 0
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -24,6 +24,7 @@
#include "graph/buffer.h"
#include "graph/ge_attr_value.h"
#include "graph/ge_context.h"
#include "graph/types.h"
#include "graph/node.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
@@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
if (output_op_desc != nullptr) {
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
}

// fusion: other type's size not means malloc HBM memory
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
if (l1_flag) {
@@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
size = 0;
}

int32_t calc_type = 0;
bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;);

std::string peer_name;
uint32_t peer_input_index = 0;
bool out_node_set_continuous_input = false;


+ 6
- 2
ge/graph/build/memory/var_mem_assign_util.cc View File

@@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
return FAILED);
ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0);
GE_CHECK_NOTNULL(tensor_desc);
rtMemType_t memory_type = RT_MEMORY_HBM;
uint32_t mem_type = 0;
if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) {
memory_type = RT_MEMORY_RDMA_HBM;
}
if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) {
GE_CHK_STATUS_RET(
VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM));
VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type));
GE_IF_BOOL_EXEC(n->GetType() == VARIABLE,
GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID())));
GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
@@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
}

uint8_t *dev_ptr = nullptr;
rtMemType_t memory_type = RT_MEMORY_HBM;
GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID())
->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type));
vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset();


+ 27
- 2
ge/graph/build/stream_allocator.cc View File

@@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const {
return false;
}

// Iteraotor loop :
// StreamSwitch -> StreamActive
// FpBp loop:
// StreamSwitch -> AssignAdd -> StreamActive
NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) {
for (auto pre_node : active_node->GetInControlNodes()) {
if (pre_node->GetType() == STREAMSWITCH) {
return pre_node;
}
for (auto pre_pre_node : pre_node->GetInControlNodes()) {
if (pre_pre_node->GetType() == STREAMSWITCH) {
return pre_pre_node;
}
}
}
return nullptr;
}

Status StreamAllocator::SetActiveStreamsForLoop() {
vector<uint32_t> loop_active_streams;
for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) {
@@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
bool is_loop_active = false;
if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) {
vector<string> activated_label_list;

NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node);
if (pre_switch_node == nullptr) {
GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str());
return FAILED;
}

if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) ||
activated_label_list.empty()) {
GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams),
@@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
// it may cause some stream actived by iterator next step when this stream still alive.
// If above situation happen, active message will lose, cause process block in next iteration.
// In order to avoid this abnormal happen,
// add event between each last node and iterator active node in target active stream
// add event between each last node and iterator switch node
GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size());
for (auto iter : stream_id_to_last_node) {
if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) {
@@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
continue;
}
AddSendEventId(iter.second, event_num_);
AddRecvEventId(node, event_num_);
AddRecvEventId(pre_switch_node, event_num_);
event_num_++;
}



+ 1
- 1
ge/graph/execute/graph_execute.cc View File

@@ -21,7 +21,7 @@

#include "common/ge_inner_error_codes.h"
#include "common/model_parser/base.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/model_manager/model_manager.h"
#include "omm/csa_interact.h"
#include "runtime/dev.h"
#include "runtime/mem.h"


+ 2
- 2
ge/graph/load/graph_loader.cc View File

@@ -22,8 +22,8 @@
#include "common/helper/model_helper.h"
#include "common/util.h"
#include "graph/ge_context.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "omm/csa_interact.h"
#include "runtime/dev.h"


ge/graph/load/new_model_manager/aipp_utils.cc → ge/graph/load/model_manager/aipp_utils.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/aipp_utils.h"
#include "graph/load/model_manager/aipp_utils.h"

#include <string>


ge/graph/load/new_model_manager/aipp_utils.h → ge/graph/load/model_manager/aipp_utils.h View File


ge/graph/load/new_model_manager/cpu_queue_schedule.cc → ge/graph/load/model_manager/cpu_queue_schedule.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/cpu_queue_schedule.h"
#include "graph/load/model_manager/cpu_queue_schedule.h"
#include "common/debug/ge_log.h"
#include "common/debug/log.h"


ge/graph/load/new_model_manager/cpu_queue_schedule.h → ge/graph/load/model_manager/cpu_queue_schedule.h View File

@@ -20,8 +20,8 @@
#include <vector>

#include "common/ge_inner_error_codes.h"
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/new_model_manager/zero_copy_offset.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/load/model_manager/zero_copy_offset.h"
#include "runtime/kernel.h"

namespace ge {

ge/graph/load/new_model_manager/data_dumper.cc → ge/graph/load/model_manager/data_dumper.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/data_dumper.h"
#include "graph/load/model_manager/data_dumper.h"

#include <cstdlib>
#include <ctime>
@@ -29,7 +29,7 @@
#include "framework/common/util.h"
#include "graph/anchor.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/manager/util/debug.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/tensor_utils.h"

ge/graph/load/new_model_manager/data_dumper.h → ge/graph/load/model_manager/data_dumper.h View File


ge/graph/load/new_model_manager/data_inputer.cc → ge/graph/load/model_manager/data_inputer.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/data_inputer.h"
#include "graph/load/model_manager/data_inputer.h"

#include <securec.h>


ge/graph/load/new_model_manager/data_inputer.h → ge/graph/load/model_manager/data_inputer.h View File


ge/graph/load/new_model_manager/davinci_model.cc → ge/graph/load/model_manager/davinci_model.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

#include <graph/utils/node_utils.h>
#include <algorithm>
@@ -36,9 +36,9 @@
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/graph.h"
#include "graph/load/new_model_manager/cpu_queue_schedule.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/new_model_manager/tbe_handle_store.h"
#include "graph/load/model_manager/cpu_queue_schedule.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/trans_var_data_utils.h"
@@ -520,6 +520,8 @@ Status DavinciModel::DoTaskSink() {

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");

GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed.");

GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");

GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));
@@ -716,24 +718,10 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed");
GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink");

auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) ||
findByOmName || findByModelName;
if (dump_l1fusion_op) {
// malloc 2M for dump l1fusion op
GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR));

// send l1fusion dump addr to rts
GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion));
}

/// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution,
/// the aicpu opertor needs to destroy history record, and update operator memory address.
/// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel().
need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer();
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_);

string fp_ceiling_mode;
if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
@@ -2079,6 +2067,8 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO

Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) {
GELOGD("Output node size: %zu", output_op_list.size());
vector<string> out_node_name;
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name);
for (const auto &op_desc : output_op_list) {
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
for (uint32_t index = 0; index < out_size; index++) {
@@ -2092,11 +2082,11 @@ Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list)
GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR,
"construct output_name failed.");
// forward compatbility, if old om has no out_node_name, need to return output follow origin way
if (out_size == out_node_name_.size()) {
if (out_size == out_node_name.size()) {
// neweast plan, the index will add to name during generate model.
bool contains_colon = out_node_name_[index].find(":") != std::string::npos;
bool contains_colon = out_node_name[index].find(":") != std::string::npos;
output_name =
contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]);
contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]);
} else {
output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" +
std::to_string(src_index[index]);
@@ -3075,6 +3065,64 @@ Status DavinciModel::MallocKnownArgs() {
return SUCCESS;
}

void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
const domi::TaskDef &task_def, size_t task_index) {
bool flag = GetL1FusionEnableOption();
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
if (env_flag != 0) {
flag = true;
}

TaskDescInfo task_desc_info;
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
// task type
task_desc_info.task_type = kTaskTypeInvalid;
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (model_task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
task_desc_info.task_type = kTaskTypeAicore;
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
task_desc_info.task_type = kTaskTypeAicpu;
} else {
GELOGD("Other kernel type: %u", context.kernel_type());
}
} else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) {
task_desc_info.task_type = kTaskTypeAicpu;
} else {
GELOGD("Skip task type: %d", static_cast<int>(model_task_type));
}
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
}
}
return;
}

Status DavinciModel::DistributeTask() {
GELOGI("do Distribute.");
for (auto &task : cpu_task_list_) {
@@ -3086,18 +3134,11 @@ Status DavinciModel::DistributeTask() {
}

task_desc_info_.clear();
bool flag = GetL1FusionEnableOption();
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
if (env_flag != 0) {
flag = true;
}

const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
auto &task_def = model_task_def->task(task_index);
auto &task = task_list_.at(task_index);
GE_CHECK_NOTNULL(task);
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
// for data dump
auto op_index = std::max(task_def.kernel().context().op_index(),
@@ -3117,33 +3158,9 @@ Status DavinciModel::DistributeTask() {
GE_IF_BOOL_EXEC(no_need_profiling, continue);

SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
// Load task info for profiling
TaskDescInfo task_desc_info;
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
}
}

// save task info for profiling
SaveProfilingTaskDescInfo(op, task, task_def, task_index);
}
// launch dump kernel to aicpu
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
@@ -3951,7 +3968,6 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str
data_dumper_.SetOmName(om_name_);
data_dumper_.SetComputeGraph(graph);
data_dumper_.SetRefInfo(saved_task_addrs_);
data_dumper_.SetL1FusionAddr(l1_fusion_addr_);

int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
@@ -4161,4 +4177,28 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
}
}

Status DavinciModel::InitL1DataDumperArgs() {
auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end();
bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end();
bool dump_l1fusion_op =
(all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name;
if (dump_l1fusion_op) {
// malloc 2M for dump l1fusion op
GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR));

// send l1fusion dump addr to rts
if (rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion) !=
RT_ERROR_NONE) {
// l1_fusion_addr_ will be free when DavinciModel destruct
GELOGE(FAILED, "Call rtDumpAddrSet failed");
return FAILED;
}

// set addr for l1 data dump
data_dumper_.SetL1FusionAddr(l1_fusion_addr_);
}
return SUCCESS;
}

} // namespace ge

ge/graph/load/new_model_manager/davinci_model.h → ge/graph/load/model_manager/davinci_model.h View File

@@ -32,12 +32,12 @@
#include "common/types.h"
#include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/aipp_utils.h"
#include "graph/load/new_model_manager/data_dumper.h"
#include "graph/load/new_model_manager/data_inputer.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/zero_copy_offset.h"
#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/load/model_manager/aipp_utils.h"
#include "graph/load/model_manager/data_dumper.h"
#include "graph/load/model_manager/data_inputer.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/load/model_manager/zero_copy_offset.h"
#include "graph/load/model_manager/zero_copy_task.h"
#include "graph/model.h"
#include "graph/node.h"
#include "graph/op_desc.h"
@@ -623,6 +623,9 @@ class DavinciModel {

Status DistributeTask();

void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
const domi::TaskDef &task_def, size_t task_index);

uint8_t *MallocFeatureMapMem(size_t data_size);

uint8_t *MallocWeightsMem(size_t weights_size);
@@ -837,6 +840,8 @@ class DavinciModel {

void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

Status InitL1DataDumperArgs();

Status InitModelProfile();
Status SinkModelProfile();

@@ -881,7 +886,6 @@ class DavinciModel {
GeModelPtr ge_model_; // release after DavinciModel::Init

bool need_destroy_aicpu_kernel_{false};
vector<string> out_node_name_;

map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init


ge/graph/load/new_model_manager/davinci_model_parser.cc → ge/graph/load/model_manager/davinci_model_parser.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/davinci_model_parser.h"

namespace ge {
DavinciModelParser::DavinciModelParser() {}

ge/graph/load/new_model_manager/davinci_model_parser.h → ge/graph/load/model_manager/davinci_model_parser.h View File


ge/graph/load/new_model_manager/model_manager.cc → ge/graph/load/model_manager/model_manager.cc View File

@@ -14,10 +14,11 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/model_manager/model_manager.h"

#include <string>

#include "mmpa/mmpa_api.h"
#include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
#include "common/dump/dump_manager.h"
#include "common/l2_cache_optimize.h"
@@ -27,8 +28,8 @@
#include "framework/common/util.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h"
#include "graph/common/local_context.h"
#include "graph/utils/attr_utils.h"
@@ -53,7 +54,6 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
const char *const kDeleteCustOp = "deleteCustOp";
const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000;
const int kSessionMaxBias = 100;
const int kOpNameMaxSize = 100;
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
@@ -1024,6 +1024,12 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp
}

Status ModelManager::GenSessionId(uint64_t &session_id) {
const uint64_t kSessionTimeMask = 0xffffffffffff0000;
const uint64_t kSessionPidMask = 0x000000000000ff00;
const uint64_t kSessionBiasMask = 0x00000000000000ff;

const uint64_t kMaskPerOffset = 8;

std::lock_guard<std::mutex> lock(session_id_create_mutex_);

mmTimeval tv;
@@ -1031,12 +1037,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
GELOGE(INTERNAL_ERROR, "Failed to get current time.");
return INTERNAL_ERROR;
}
session_id = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us
uint64_t timestamp = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us

static uint32_t pid = mmGetPid();

session_id_bias_++;
// max bais 100.
session_id_bias_ = session_id_bias_ % kSessionMaxBias;
session_id = session_id * kSessionMaxBias + session_id_bias_;
session_id = ((timestamp<<kMaskPerOffset<<kMaskPerOffset) & kSessionTimeMask) +
((pid<<kMaskPerOffset) & kSessionPidMask) + (session_id_bias_ & kSessionBiasMask);

GELOGD("Generate new session id: %lu.", session_id);
return SUCCESS;

ge/graph/load/new_model_manager/model_manager.h → ge/graph/load/model_manager/model_manager.h View File


ge/graph/load/new_model_manager/model_utils.cc → ge/graph/load/model_manager/model_utils.cc View File

@@ -14,20 +14,13 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/model_utils.h"

#include "graph/load/model_manager/model_utils.h"
#include <string>

#include "common/debug/log.h"
#include "common/op/ge_op_utils.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/tensor_utils.h"
#include "runtime/base.h"
#include "runtime/kernel.h"

#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/types.h"

#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \
do { \
@@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
int64_t input_offset = v_input_offset[non_const_index];
non_const_index++;
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset),
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base);
uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base;
uint8_t *variable_addr = nullptr;
GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {});
v_input_data_addr.push_back(variable_addr);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);
int64_t mem_type;
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
// feature maps
@@ -380,6 +373,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
return v_input_data_addr;
}

///
/// @ingroup ge
/// @brief Get variable address.
/// @return Status
///
Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
uint8_t *&var_addr) {
rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset);
switch (mem_type) {
case RT_MEMORY_RDMA_HBM:
if (offset < 0) {
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset));
return PARAM_INVALID;
}
var_addr = reinterpret_cast<uint8_t *>(offset);
break;
case RT_MEMORY_HBM:
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base);
var_addr = model_param.var_base + offset - model_param.logic_var_base;
break;
default:
GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type);
return PARAM_INVALID;
}
GE_CHECK_NOTNULL(var_addr);
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Get output data address.
@@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
return v_output_data_addr;
}
for (size_t i = 0; i < outputs_size; ++i) {
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base);
uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base;
v_output_data_addr.push_back(variable_addr);
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

int32_t calc_type = 0;
bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str());
continue;
}
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
uint8_t *variable_addr = nullptr;
GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {});
v_output_data_addr.push_back(variable_addr);
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);

int64_t mem_type;
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
// feature maps

ge/graph/load/new_model_manager/model_utils.h → ge/graph/load/model_manager/model_utils.h View File

@@ -21,7 +21,7 @@

#include "common/ge_inner_error_codes.h"
#include "common/types.h"
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/op_desc.h"
#include "graph/utils/tensor_adapter.h"

@@ -107,6 +107,15 @@ class ModelUtils {
/// @return Status
///
static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr);

private:
///
/// @ingroup ge
/// @brief Get variable address.
/// @return Status
///
static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset,
uint8_t *&var_addr);
};
} // namespace ge


ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc → ge/graph/load/model_manager/task_info/end_graph_task_info.cc View File

@@ -14,11 +14,11 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/end_graph_task_info.h"
#include "graph/load/model_manager/task_info/end_graph_task_info.h"

#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace {
const uint32_t kDumpFlag = 2;

ge/graph/load/new_model_manager/task_info/end_graph_task_info.h → ge/graph/load/model_manager/task_info/end_graph_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class EndGraphTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/event_record_task_info.cc → ge/graph/load/model_manager/task_info/event_record_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/event_record_task_info.h"
#include "graph/load/model_manager/task_info/event_record_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/event_record_task_info.h → ge/graph/load/model_manager/task_info/event_record_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class EventRecordTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc → ge/graph/load/model_manager/task_info/event_wait_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/event_wait_task_info.h"
#include "graph/load/model_manager/task_info/event_wait_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/event_wait_task_info.h → ge/graph/load/model_manager/task_info/event_wait_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class EventWaitTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc → ge/graph/load/model_manager/task_info/fusion_start_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h"
#include "graph/load/model_manager/task_info/fusion_start_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h → ge/graph/load/model_manager/task_info/fusion_start_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class FusionStartTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc → ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h"
#include "graph/load/model_manager/task_info/fusion_stop_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h → ge/graph/load/model_manager/task_info/fusion_stop_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class FusionStopTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/hccl_task_info.cc → ge/graph/load/model_manager/task_info/hccl_task_info.cc View File

@@ -14,14 +14,14 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/hccl_task_info.h"
#include "graph/load/model_manager/task_info/hccl_task_info.h"

#include <utility>

#include "common/opskernel/ops_kernel_info_store.h"
#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/model_utils.h"

namespace ge {
std::mutex HcclTaskInfo::hccl_follow_stream_mutex_;

ge/graph/load/new_model_manager/task_info/hccl_task_info.h → ge/graph/load/model_manager/task_info/hccl_task_info.h View File

@@ -23,7 +23,7 @@
#include <vector>

#include "common/opskernel/ge_task_info.h"
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/manager/util/hcom_util.h"
namespace ge {
class HcclTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc → ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h"
#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"

#include <vector>

@@ -24,8 +24,8 @@
#include "framework/common/debug/ge_log.h"
#include "framework/common/fmk_error_codes.h"
#include "graph/attr_value.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/model_manager.h"

namespace ge {
Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h → ge/graph/load/model_manager/task_info/kernel_ex_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/op_desc.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/kernel_task_info.cc → ge/graph/load/model_manager/task_info/kernel_task_info.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/kernel_task_info.h"
#include "graph/load/model_manager/task_info/kernel_task_info.h"
#include <map>
#include <memory>
#include <string>
@@ -25,9 +25,9 @@
#include "framework/common/debug/ge_log.h"
#include "framework/common/l2_cache_optimize.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/load/model_manager/model_utils.h"
#include "runtime/kernel.h"
#include "super_kernel/super_kernel.h"
#include "super_kernel/super_kernel_factory.h"

ge/graph/load/new_model_manager/task_info/kernel_task_info.h → ge/graph/load/model_manager/task_info/kernel_task_info.h View File

@@ -22,7 +22,7 @@
#include <string>
#include <vector>

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/op_desc.h"
namespace ge {
class KernelTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc → ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc View File

@@ -14,9 +14,9 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h"
#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h"

#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h → ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class LabelGotoExTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/label_set_task_info.cc → ge/graph/load/model_manager/task_info/label_set_task_info.cc View File

@@ -14,9 +14,9 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/label_set_task_info.h"
#include "graph/load/model_manager/task_info/label_set_task_info.h"

#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/label_set_task_info.h → ge/graph/load/model_manager/task_info/label_set_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class LabelSetTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc → ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h"
#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h"

#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
constexpr uint8_t kLabelSwitchIndexNum = 1;

ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h → ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class LabelSwitchByIndexTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc → ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h"
#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace {
const uint32_t kAlignBytes = 64;

ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h → ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class MemcpyAddrAsyncTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc → ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h"
#include "graph/load/model_manager/task_info/memcpy_async_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h → ge/graph/load/model_manager/task_info/memcpy_async_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/op_desc.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc → ge/graph/load/model_manager/task_info/model_exit_task_info.cc View File

@@ -14,11 +14,11 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/model_exit_task_info.h"
#include "graph/load/model_manager/task_info/model_exit_task_info.h"

#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/model_exit_task_info.h → ge/graph/load/model_manager/task_info/model_exit_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class ModelExitTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc → ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h"
#include "graph/load/model_manager/task_info/profiler_trace_task_info.h"

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"

namespace ge {
Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {

ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h → ge/graph/load/model_manager/task_info/profiler_trace_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class ProfilerTraceTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc → ge/graph/load/model_manager/task_info/stream_active_task_info.cc View File

@@ -14,12 +14,12 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/stream_active_task_info.h"
#include "graph/load/model_manager/task_info/stream_active_task_info.h"

#include <vector>

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/stream_active_task_info.h → ge/graph/load/model_manager/task_info/stream_active_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class StreamActiveTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc → ge/graph/load/model_manager/task_info/stream_switch_task_info.cc View File

@@ -14,13 +14,13 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h"
#include "graph/load/model_manager/task_info/stream_switch_task_info.h"

#include <vector>

#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h → ge/graph/load/model_manager/task_info/stream_switch_task_info.h View File

@@ -16,7 +16,7 @@

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

namespace ge {
class StreamSwitchTaskInfo : public TaskInfo {

ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc → ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc View File

@@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h"
#include "graph/load/model_manager/task_info/stream_switchn_task_info.h"
#include <vector>
#include "framework/common/debug/ge_log.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/model_utils.h"

namespace {
const uint8_t kStreamSwitchnInputNum = 1;

ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h → ge/graph/load/model_manager/task_info/stream_switchn_task_info.h View File

@@ -17,7 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"
#include "graph/op_desc.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc → ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc View File


ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h → ge/graph/load/model_manager/task_info/super_kernel/super_kernel.h View File


ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc → ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc View File


ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h → ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h View File


ge/graph/load/new_model_manager/task_info/task_info.cc → ge/graph/load/model_manager/task_info/task_info.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/model_manager/task_info/task_info.h"

#include <vector>


ge/graph/load/new_model_manager/task_info/task_info.h → ge/graph/load/model_manager/task_info/task_info.h View File

@@ -22,8 +22,8 @@
#include "cce/customize.h"
#include "framework/common/taskdown_common.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/load/new_model_manager/ts_mem_mall.h"
#include "graph/load/new_model_manager/task_info/task_info_factory.h"
#include "graph/load/model_manager/ts_mem_mall.h"
#include "graph/load/model_manager/task_info/task_info_factory.h"
#include "proto/task.pb.h"

namespace ge {

ge/graph/load/new_model_manager/task_info/task_info_factory.h → ge/graph/load/model_manager/task_info/task_info_factory.h View File


ge/graph/load/new_model_manager/tbe_handle_store.cc → ge/graph/load/model_manager/tbe_handle_store.cc View File


ge/graph/load/new_model_manager/tbe_handle_store.h → ge/graph/load/model_manager/tbe_handle_store.h View File


ge/graph/load/new_model_manager/ts_mem_mall.h → ge/graph/load/model_manager/ts_mem_mall.h View File


ge/graph/load/new_model_manager/zero_copy_offset.cc → ge/graph/load/model_manager/zero_copy_offset.cc View File

@@ -14,12 +14,12 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/zero_copy_offset.h"
#include "graph/load/model_manager/zero_copy_offset.h"

#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/load/model_manager/zero_copy_task.h"

namespace ge {
namespace {

ge/graph/load/new_model_manager/zero_copy_offset.h → ge/graph/load/model_manager/zero_copy_offset.h View File

@@ -25,7 +25,7 @@
#include "external/ge/ge_api_error_codes.h"
#include "framework/common/ge_types.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/load/model_manager/zero_copy_task.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/tensor_utils.h"
#include "runtime/mem.h"
@@ -65,7 +65,7 @@ class ZeroCopyOffset {
// data_size of Data/Netoutput
int64_t GetDataSize() const { return data_size_; }
// value of *outside_addrs_ from davinci_model
std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
// name of op
std::string GetOpName() const { return op_name_; }


ge/graph/load/new_model_manager/zero_copy_task.cc → ge/graph/load/model_manager/zero_copy_task.cc View File

@@ -14,11 +14,11 @@
* limitations under the License.
*/

#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/load/model_manager/zero_copy_task.h"

#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/model_utils.h"
#include "common/ge_compiler_options.h"

namespace ge {

ge/graph/load/new_model_manager/zero_copy_task.h → ge/graph/load/model_manager/zero_copy_task.h View File


+ 26
- 8
ge/graph/manager/graph_manager.cc View File

@@ -53,6 +53,7 @@
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/fuse_data_nodes_with_common_input_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/input_output_connection_identify_pass.h"
#include "graph/passes/iterator_op_pass.h"
@@ -2104,6 +2105,24 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass));
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass));
/*
* Do CSE before FuseDataNodesWithCommonInputPass to resolve the scene in bertlarge as following:
* const
* / | \
* cast1 cast2 cast3
* \ | /
* case
* the node `const` is the fused const node after ConstantFuseSamePass
* the nodes `cast1`, `cast2` and 'cast3' will be fused by CSE.
* in order to eliminate hard code in FuseDataNodesWithCommonInputPass,
* we do CSE before FuseDataNodesWithCommonInputPass
* But it is a temp solution, this CSE will be deleted after change pass from graph pass to node pass
*/
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CSEBeforeFuseDataNodesWithCommonInputPass",
new (std::nothrow) CommonSubexpressionEliminationPass));
// FuseDataNodesWithCommonInputPass: fuse same data with common input in same graph
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::FuseDataNodesWithCommonInputPass",
new (std::nothrow) FuseDataNodesWithCommonInputPass));
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass",
new (std::nothrow) CommonSubexpressionEliminationPass));
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass))
@@ -2226,12 +2245,12 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
GELOGE(ret, "Run passes when OptimizeStage1_3 failed, ret:%u.", ret);
return ret;
}
NamesToPass identity_remove_pass;
GE_TIMESTAMP_START(identity_remove_pass);
NamesToPass node_pass;
GE_TIMESTAMP_START(node_pass);
IdentityPass identity_force_pass(false); // after SwitchToStreamSwitchPass
identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass);
ret = GEPass(compute_graph).Run(identity_remove_pass);
GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass");
node_pass.emplace_back("IdentityPass", &identity_force_pass);
ret = GEPass(compute_graph).Run(node_pass);
GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass");
if (ret != SUCCESS) {
GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret);
return ret;
@@ -3102,9 +3121,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp
graph_name.append(std::to_string(graph_node->GetGraphId()));
compute_graph->SetName(graph_name);
}
std::vector<SubGraphInfoPtr> sub_graph_list;
auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model,
session_id);

auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "SubGraph build Failed.");
return ret;


+ 58
- 16
ge/graph/manager/graph_var_manager.cc View File

@@ -16,17 +16,10 @@

#include "graph/manager/graph_var_manager.h"

#include <utility>

#include "common/l2_cache_optimize.h"
#include "common/types.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "ge/ge_api_types.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/manager/trans_var_data_utils.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/type_utils.h"

using std::map;
@@ -37,7 +30,7 @@ namespace ge {
VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {}

VarResource::~VarResource() {
var_offset_set_.clear();
var_offset_map_.clear();
var_addr_mgr_map_.clear();
cur_var_tensor_desc_map_.clear();
var_broad_cast_info_.clear();
@@ -91,8 +84,10 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
std::string var_key = VarKey(var_name, tensor_desc);
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str());
if (var_addr_mgr_map_.count(var_key) == 0) {
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() +
static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
uint64_t logic_address = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
if (memory_type != RT_MEMORY_RDMA_HBM) {
logic_address += VarManager::Instance(session_id_)->GetVarMemLogicBase();
}
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(),
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str());
@@ -102,7 +97,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
var_addr_mgr.tensor_desc = tensor_desc;
var_addr_mgr.memory_type = memory_type;
var_addr_mgr_map_[var_key] = var_addr_mgr;
var_offset_set_.insert(logic_address);
var_offset_map_[logic_address] = memory_type;

return SUCCESS;
}
@@ -211,7 +206,14 @@ ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_na
return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr);
}

bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; }
bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; }

rtMemType_t VarResource::GetVarMemType(const int64_t &offset) {
if (var_offset_map_.count(offset) > 0) {
return var_offset_map_[offset];
}
return RT_MEMORY_RESERVED;
}

VarTransRoad *VarResource::GetTransRoad(const std::string &var_name) {
auto iter = var_to_trans_road_.find(var_name);
@@ -252,7 +254,19 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr

MemResource::MemResource() : total_size_(0), var_mem_size_(0) {}

Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) {
MemResource *MemResource::BuildMemResourceFromType(rtMemType_t mem_type) {
switch (mem_type) {
case RT_MEMORY_HBM:
return new (std::nothrow) HbmMemResource();
case RT_MEMORY_RDMA_HBM:
return new (std::nothrow) RdmaMemResource();
default:
return nullptr;
}
}

Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id,
size_t &mem_offset) {
size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;
uint64_t real_size = size;
total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize();
@@ -282,6 +296,19 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin
return SUCCESS;
}

Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) {
uint8_t *buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(size);
if (buffer == nullptr) {
GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %llu", var_name.c_str(), size);
return MEMALLOC_FAILED;
}
address = static_cast<size_t>(reinterpret_cast<uintptr_t>(buffer));
var_mem_size_ += size;
GELOGI("[IMAS]AssignVarMem Set session_%llu name[%s] output[%d] addr to [%p] size[%llu].",
session_id, var_name.c_str(), 0, buffer, size);
return SUCCESS;
}

uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; }

void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; };
@@ -428,7 +455,7 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) {
MemResource *mem_resource = nullptr;
auto iter = mem_resource_map_.find(memory_type);
if (iter == mem_resource_map_.end()) {
mem_resource = new (std::nothrow) MemResource();
mem_resource = MemResource::BuildMemResourceFromType(memory_type);
if (mem_resource == nullptr) {
GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type);
return ge::INTERNAL_ERROR;
@@ -465,7 +492,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
MemResource *mem_resource = nullptr;
auto it = mem_resource_map_.find(memory_type);
if (it == mem_resource_map_.end()) {
mem_resource = new (std::nothrow) MemResource();
mem_resource = MemResource::BuildMemResourceFromType(memory_type);
if (mem_resource == nullptr) {
GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type);
return ge::INTERNAL_ERROR;
@@ -629,6 +656,15 @@ bool VarManager::IsVarAddr(const int64_t &offset) {
return var_resource_->IsVarAddr(offset);
}

rtMemType_t VarManager::GetVarMemType(const int64_t &offset) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (var_resource_ == nullptr) {
GELOGW("VarManager has not been init.");
return RT_MEMORY_RESERVED;
}
return var_resource_->GetVarMemType(offset);
}

ge::Status VarManager::MallocVarMemory(size_t memory_size) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
uint8_t *var_mem_base = nullptr;
@@ -654,12 +690,18 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) {

uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (memory_type == RT_MEMORY_RDMA_HBM) {
return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr();
}
string memory_key = std::to_string(session_id_);
return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key);
}

uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (memory_type == RT_MEMORY_RDMA_HBM) {
return logic_addr;
}
string mem_key = std::to_string(session_id_);
uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key);
if (mem_base == nullptr) {


+ 25
- 4
ge/graph/manager/graph_var_manager.h View File

@@ -158,13 +158,15 @@ class VarResource {

bool IsVarAddr(const int64_t &offset);

rtMemType_t GetVarMemType(const int64_t &offset);

std::unordered_map<std::string, ge::GeTensorDesc> GetAllVarDesc() const { return cur_var_tensor_desc_map_; }

private:
std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc);

uint64_t session_id_;
std::unordered_set<uint64_t> var_offset_set_;
std::unordered_map<uint64_t, rtMemType_t> var_offset_map_;
std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_;
std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_;
std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_;
@@ -176,19 +178,36 @@ class VarResource {
class MemResource {
public:
MemResource();
~MemResource() = default;
virtual ~MemResource() = default;
static MemResource *BuildMemResourceFromType(rtMemType_t mem_type);

Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset);
virtual Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) = 0;

uint64_t GetVarMemSize() const;

void UpdateVarMemSize(int64_t mem_size);

private:
protected:
uint64_t total_size_;
uint64_t var_mem_size_;
};

class HbmMemResource : public MemResource {
public:
HbmMemResource() = default;
~HbmMemResource() override = default;

Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override;
};

class RdmaMemResource : public MemResource {
public:
RdmaMemResource() = default;
~RdmaMemResource() override = default;

Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &address) override;
};

class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {
public:
static VarManager *Instance(uint64_t session_id);
@@ -275,6 +294,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {

bool IsVarAddr(const int64_t &offset);

rtMemType_t GetVarMemType(const int64_t &offset);

uint8_t *GetVarMemoryBase(rtMemType_t memory_type);

uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type);


+ 4
- 0
ge/graph/manager/rdma_pool_allocator.h View File

@@ -53,6 +53,10 @@ class RdmaPoolAllocator {

Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size);

uint8_t *GetRdmaBaseAddr() { return rdma_base_addr_; }

size_t GetRdmaMemSize() { return rdma_mem_size_; }

private:
void MergeBlocks(Block *dst, Block *src);



+ 57
- 10
ge/graph/partition/dynamic_shape_partition.cc View File

@@ -44,18 +44,46 @@
#define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__)
#define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__)

bool IsExperimental() {
const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr);
return kIsExperimental;
}

namespace ge {
using Cluster = DynamicShapePartitioner::Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;

static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) {
for (const auto &node : root_graph->GetAllNodes()) {
GE_CHECK_NOTNULL(node->GetOpDesc());
for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) {
auto type = input_desc.GetDataType();
if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) {
if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) {
return false;
} else {
GEEVENT("In dynamic shape scene, model contains data type:"
"DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well "
"temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\".");
break;
}
}
}
for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) {
auto type = output_desc.GetDataType();
if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) {
if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) {
return false;
} else {
GEEVENT("In dynamic shape scene, model contains data type:"
"DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well "
"temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\".");
break;
}
}
}
}
return true;
}

Status DynamicShapePartitioner::Partition() {
REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr.");
if (!IsExperimental()) {
if (!IsInExperimentalMode(root_graph_)) {
GELOGD("Skip dynamic shape partition as not in experimental mode.");
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false),
"Failed set dynamic shape partitioned flag on root graph.");
@@ -185,6 +213,7 @@ std::string DynamicShapePartitioner::DebugString() const {
size_t data = 0;
size_t netoutput = 0;
size_t is_inputnode = 0;
size_t stage = 0;
std::stringstream ss;
ss << "All unknown shape nodes:" << std::endl;
for (const auto &node : unknown_shape_nodes_) {
@@ -201,10 +230,13 @@ std::string DynamicShapePartitioner::DebugString() const {
netoutput++;
} else if (cluster->IsInputNode()) {
is_inputnode++;
} else if (cluster->IsIndependent()) {
stage++;
}
}
ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known
<< ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl;
<< ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode
<< ", stage:" << stage << std::endl;
for (const auto &cluster : unique_clusters_) {
ss << " " << cluster->DebugString() << std::endl;
}
@@ -244,12 +276,15 @@ Status DynamicShapePartitioner::InitClusters() {
for (const auto &node : graph->GetDirectNode()) {
Cluster::Type type = Cluster::DATA;
bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty();
REQUIRE_NOT_NULL(node->GetOpDesc(), "op_desc is null");
if (node->GetType() == DATA) {
type = Cluster::DATA;
} else if (is_input) {
type = Cluster::INPUT_NODE;
} else if (node->GetType() == NETOUTPUT) {
type = Cluster::NETOUTPUT;
} else if ((node->GetType() == PARTITIONEDCALL) && (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL))) {
type = Cluster::STAGE;
} else if (unknown_shape_nodes_.count(node) > 0) {
type = Cluster::UNKNOWN_SHAPE;
} else {
@@ -332,6 +367,9 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) {
void DynamicShapePartitioner::MergeClustersUnknownShape() {
// Merge unknown shape clusters
for (const auto &cluster : ordered_cluster_) {
if (cluster->IsIndependent()) {
continue;
}
for (const auto &in_cluster : cluster->Inputs()) {
if (!in_cluster->IsUnknownShape()) {
continue;
@@ -351,6 +389,9 @@ void DynamicShapePartitioner::MergeClustersUnknownShape() {
void DynamicShapePartitioner::MergeClustersKnownShape() {
// Merge known shape clusters
for (const auto &cluster : ordered_cluster_) {
if (cluster->IsIndependent()) {
continue;
}
if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) {
auto in_cluster = *(cluster->Inputs().begin());
in_cluster->Merge(cluster);
@@ -578,6 +619,7 @@ void Cluster::UpdateRank(size_t rank) {
bool Cluster::IsData() const { return type_ == DATA; };
bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; };
bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; };
bool Cluster::IsIndependent() const { return type_ == STAGE; };
bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; };
bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; };
bool Cluster::IsRefVariable() const {
@@ -613,6 +655,9 @@ void Cluster::RemoveOutput(ClusterPtr out) {
out->in_clusters_.end());
};
void Cluster::Merge(ClusterPtr other) {
if (other->IsIndependent()) {
return;
}
nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end());
other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()),
other->in_clusters_.end());
@@ -661,7 +706,9 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
std::unordered_set<ClusterPtr> forward_reached_clusters;
std::unordered_set<ClusterPtr> backward_reached_clusters;
std::vector<ClusterPtr> path_clusters;

if (other->IsIndependent()) {
return path_clusters;
}
if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) ==
other->out_clusters_.end()) {
return path_clusters;
@@ -744,7 +791,7 @@ Status Cluster::BuildFrame() {
}
}
}
if (IsData()) {
if (IsData() || IsIndependent()) {
for (const auto &anchor : node->GetAllOutDataAnchors()) {
AddFrameOutput(anchor);
}
@@ -860,7 +907,7 @@ Status Cluster::CombinePartitionFrame() {
}

Status Cluster::BuildPartitionSubgraph() {
if (IsData() || IsNetOutput()) {
if (IsData() || IsNetOutput() || IsIndependent()) {
return SUCCESS;
}
int64_t parent_node_index = 0;


+ 2
- 1
ge/graph/partition/dynamic_shape_partition.h View File

@@ -32,7 +32,7 @@ class DynamicShapePartitioner {
// DATA:DATA, UNKNOWN_SHAPE:unknowshape, KNOWN_SHAPE:knowshape, NETOUTPUT:NETOUTPUT.
class Cluster : public std::enable_shared_from_this<Cluster> {
public:
enum Type { DATA, INPUT_NODE, NETOUTPUT, KNOWN_SHAPE, UNKNOWN_SHAPE };
enum Type { DATA, INPUT_NODE, NETOUTPUT, STAGE, KNOWN_SHAPE, UNKNOWN_SHAPE };
Cluster(size_t rank, Type type, NodePtr node, DynamicShapePartitioner *partitioner)
: id_(rank), min_(rank), max_(rank), type_(type), partitioner_(partitioner) {
nodes_.push_back(node);
@@ -45,6 +45,7 @@ class DynamicShapePartitioner {
bool IsData() const;
bool IsKnownShape() const;
bool IsUnknownShape() const;
bool IsIndependent() const;
bool IsNetOutput() const;
std::vector<std::shared_ptr<Cluster>> Inputs() const;
std::vector<std::shared_ptr<Cluster>> Outputs() const;


+ 32
- 6
ge/graph/partition/stage_partition.cc View File

@@ -25,6 +25,10 @@
#include "common/types.h"

namespace ge {
namespace {
const std::set<std::string> kSrcNodeTypes = { DATA, AIPPDATA, ANN_DATA };
}

Status StagePartitioner::Partition() {
GE_CHECK_NOTNULL(root_graph_);
if (root_graph_->GetParentGraph() != nullptr) {
@@ -37,6 +41,10 @@ Status StagePartitioner::Partition() {
if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) {
continue;
}
if ((kSrcNodeTypes.count(op_desc->GetType()) != 0) && node->GetInAllNodes().empty()) {
continue;
}
GELOGD("original node %s for stage %u", node->GetName().c_str(), level);
stage_nodes_[level].insert(node);
}
if (stage_nodes_.empty()) {
@@ -54,6 +62,13 @@ Status StagePartitioner::Partition() {
return FAILED;
}

root_graph_->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool {
uint32_t a_level = UINT32_MAX;
(void)AttrUtils::GetInt(a->GetOpDesc(), ATTR_STAGE_LEVEL, a_level);
uint32_t b_level = UINT32_MAX;
(void)AttrUtils::GetInt(b->GetOpDesc(), ATTR_STAGE_LEVEL, b_level);
return a_level < b_level;
});
if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) {
GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, "
"maybe stage_level was not set correctly.", root_graph_->GetName().c_str());
@@ -76,20 +91,26 @@ Status StagePartitioner::SplitStageLevel() {
auto node = nodes.top();
nodes.pop();
GE_CHECK_NOTNULL(node->GetOpDesc());
if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) {
uint32_t tmp_level = cur_stage_level;
(void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level);
if (tmp_level != cur_stage_level) {
continue;
}
for (const auto &in_node : node->GetInAllNodes()) {
if (visited_stage_nodes.count(in_node) != 0) {
continue;
}
if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) {
GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", in_node->GetName().c_str());
return INTERNAL_ERROR;
}
GELOGD("Mark stage_level node %s, stage_level=%u", in_node->GetName().c_str(), cur_stage_level);
if ((kSrcNodeTypes.count(in_node->GetType()) != 0) && in_node->GetInAllNodes().empty()) {
GELOGD("skip data node %s for stage %u", in_node->GetName().c_str(), cur_stage_level);
continue;
}
nodes.push(in_node);
}
if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) {
GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str());
return INTERNAL_ERROR;
}
GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level);
visited_stage_nodes.emplace(node);
}
for (const auto &node : visited_stage_nodes) {
@@ -219,6 +240,11 @@ NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const
op_desc->AddSubgraphName("f");
op_desc->SetSubgraphInstanceName(0, graph_name);

if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) {
GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed", op_desc->GetName().c_str());
return nullptr;
}

NodePtr subgraph_node = root_graph_->AddNode(op_desc);
if (subgraph_node == nullptr) {
GELOGE(FAILED, "Add node %s failed.", graph_name.c_str());


+ 119
- 0
ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc View File

@@ -0,0 +1,119 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/fuse_data_nodes_with_common_input_pass.h"

#include <map>
#include <memory>
#include <string>
#include <vector>
#include <set>
#include "common/ge_inner_error_codes.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/utils/node_utils.h"

using std::map;
using std::vector;
using std::set;
using std::string;

namespace ge {
Status FuseDataNodesWithCommonInputPass::Run(ge::ComputeGraphPtr graph) {
if (graph == nullptr) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null.");
return GE_GRAPH_PARAM_NULLPTR;
}
GELOGD("FuseDataNodesWithCommonInputPass in.");
// key: subgraph, value:--key: peer out anchor to parent node, --value: parent indexes to parent node
map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> subgraphs_to_need_fuse_nodes_info;
if (InitNeedFuseNodesInfo(graph, subgraphs_to_need_fuse_nodes_info) != SUCCESS) {
GELOGE(FAILED, "InitNeedFuseNodesInfo failed.");
return FAILED;
}
return FuseDataNodes(subgraphs_to_need_fuse_nodes_info);
}

Status FuseDataNodesWithCommonInputPass::InitNeedFuseNodesInfo(ComputeGraphPtr &graph,
map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info) {
for (const auto &subgraph : graph->GetAllSubgraphs()) {
GE_CHECK_NOTNULL(subgraph);
auto parent_node = subgraph->GetParentNode();
GE_CHECK_NOTNULL(parent_node);
if (parent_node->GetType() == CASE || parent_node->GetType() == IF) {
auto &peer_out_anchors_to_parent_indexes = subgraphs_to_need_fuse_nodes_info[subgraph];
for (const auto &in_data_anchor : parent_node->GetAllInDataAnchors()) {
GE_CHECK_NOTNULL(in_data_anchor);
OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
uint32_t parent_index = static_cast<uint32_t>(in_data_anchor->GetIdx());
GE_CHECK_NOTNULL(peer_out_anchor);
peer_out_anchors_to_parent_indexes[peer_out_anchor].insert(parent_index);
GELOGD("Peer node %s is the %d input of parent node %s in %s.",
peer_out_anchor->GetOwnerNode()->GetName().c_str(), parent_index, parent_node->GetName().c_str(),
subgraph->GetName().c_str());
}
}
}
return SUCCESS;
}

Status FuseDataNodesWithCommonInputPass::FuseDataNodes(
const map<ComputeGraphPtr, map<OutDataAnchorPtr, set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info) {
for (const auto &subgraph_to_need_fuse_nodes_info : subgraphs_to_need_fuse_nodes_info) {
auto subgraph = subgraph_to_need_fuse_nodes_info.first;
for (const auto &peer_out_anchors_to_parent_indexes : subgraph_to_need_fuse_nodes_info.second) {
if (peer_out_anchors_to_parent_indexes.second.size() <= 1) {
continue;
}
// key: out anchor, value: data nodes with common input will be fused
map<OutDataAnchorPtr, vector<NodePtr>> peer_out_anchors_to_need_fuse_nodes;
for (const auto &node : subgraph->GetDirectNode()) {
if (node->GetType() != DATA) {
continue;
}
GE_CHECK_NOTNULL(node->GetOpDesc());
uint32_t parent_index = 0;
if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
if (peer_out_anchors_to_parent_indexes.second.count(parent_index) > 0) {
peer_out_anchors_to_need_fuse_nodes[peer_out_anchors_to_parent_indexes.first].emplace_back(node);
}
}
}
for (const auto &peer_out_anchor_to_need_fuse_nodes : peer_out_anchors_to_need_fuse_nodes) {
auto need_fuse_data_nodes = peer_out_anchor_to_need_fuse_nodes.second;
auto first_node = need_fuse_data_nodes.at(0);
for (size_t i = 1; i < need_fuse_data_nodes.size(); ++i) {
auto node = need_fuse_data_nodes.at(i);
GELOGI("Replace redundant data node %s by %s exist in graph: %s.", node->GetName().c_str(),
first_node->GetName().c_str(), subgraph->GetName().c_str());
// the data node which can be fused has none input(both data and control in)
if (GraphUtils::MoveOutCtrlEdges(node, first_node) != SUCCESS) {
return FAILED;
}
if (GraphUtils::ReplaceNodeDataAnchors(first_node, node, {}, {0}) != SUCCESS) {
return FAILED;
}
if (GraphUtils::RemoveNodeWithoutRelink(subgraph, node) != SUCCESS) {
GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}
} // namespace ge

+ 38
- 0
ge/graph/passes/fuse_data_nodes_with_common_input_pass.h View File

@@ -0,0 +1,38 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_
#define GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_

#include <set>
#include <map>
#include <vector>
#include "graph/types.h"
#include "inc/graph_pass.h"

namespace ge {
class FuseDataNodesWithCommonInputPass : public GraphPass {
public:
Status Run(ge::ComputeGraphPtr graph) override;

private:
Status InitNeedFuseNodesInfo(ComputeGraphPtr &graph,
map<ComputeGraphPtr, map<OutDataAnchorPtr, std::set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info);
Status FuseDataNodes(
const map<ComputeGraphPtr, map<OutDataAnchorPtr, std::set<uint32_t>>> &subgraphs_to_need_fuse_nodes_info);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_FUSE_DATA_NODES_WITH_COMMON_INPUT_PASS_H_

+ 25
- 5
ge/graph/passes/subgraph_const_migration_pass.cc View File

@@ -145,6 +145,7 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra
return GE_GRAPH_EMPTY_SUBGRAPH;
}

set<NodePtr> ctrl_only_const_nodes;
auto &data_nodes = all_data_nodes[subgraph];
auto &const_nodes = all_const_nodes[subgraph];
for (auto &node : subgraph->GetDirectNode()) {
@@ -178,15 +179,30 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra
peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx()));
}

if (peer_name_list.empty()) {
GELOGI("%s, Const: %s, no data output", subgraph->GetName().c_str(), node->GetName().c_str());
const auto in_all_nodes = node->GetInAllNodes();
if (in_all_nodes.empty() || std::all_of(in_all_nodes.begin(), in_all_nodes.end(),
[](const NodePtr &n) { return n->GetType() == DATA; })) {
ctrl_only_const_nodes.insert(node);
}
continue;
}

string key_of_const;
for (const string &name : peer_name_list) {
key_of_const += (key_of_const.empty() ? name : "_" + name);
}

const_nodes[key_of_const] = node;
GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str());
GELOGD("%s, Const: %s, Key: %s", subgraph->GetName().c_str(), node->GetName().c_str(), key_of_const.c_str());
}
}

for (auto &node : ctrl_only_const_nodes) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, node),
"Remove node without relink failed, node: %s", node->GetName().c_str());
}
}

return SUCCESS;
@@ -352,7 +368,8 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra
const auto owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str());
if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) {
graph->RemoveNode(owner_node);
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node),
"Remove node without relink failed, node: %s", owner_node->GetName().c_str());
}
}

@@ -414,7 +431,8 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra
const auto owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) {
graph->RemoveNode(owner_node);
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, owner_node),
"Remove node without relink failed, node: %s", owner_node->GetName().c_str());
}
}
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
@@ -442,7 +460,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph
const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const string &node_key, uint32_t parent_index) {
if (node_key.empty() || parent_index == kInvalidParent) {
GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str());
GELOGE(FAILED, "Graph: %s, node key: %s, parent index: %u invalid",
graph->GetName().c_str(), node_key.c_str(), parent_index);
return FAILED;
}

@@ -472,7 +491,8 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph
return FAILED;
}

GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed");
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(subgraph, move_node),
"Remove node without relink failed, node: %s", move_node->GetName().c_str());
GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str());
}



+ 4
- 3
ge/graph/passes/subgraph_pass.cc View File

@@ -142,17 +142,18 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node
GE_CHECK_NOTNULL(in_node);

// Need insert memcpy
// 1. Const->NetOutput in subgraph
// 1. Const->NetOutput in subgraph & parent graph is known
// 2. AtomicOp->NetOutput in subgraph
// 3. OutputContinuesRequiredOp->NetOutput in subgraph
// 4. Data->NetOutput in subgraph but parent_node is not while
// 5. While->NetOutput in known subgraph
std::string op_type;
bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) ||
bool insert_flag =
(NodeUtils::GetConstOpType(in_node, op_type) && !graph->GetParentGraph()->GetGraphUnknownFlag()) ||
IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) ||
((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) ||
(!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) &&
(kWhileOpTypes.count(in_node->GetType()) != 0));
(kWhileOpTypes.count(in_node->GetType()) != 0));
if (insert_flag) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";


+ 3
- 0
ge/host_cpu_engine/ops_kernel_store/op/host_op.cc View File

@@ -32,5 +32,8 @@ REGISTER_OP_CREATOR(Assign, HostOp);
REGISTER_OP_CREATOR(RandomUniform, HostOp);
REGISTER_OP_CREATOR(Add, HostOp);
REGISTER_OP_CREATOR(Mul, HostOp);
REGISTER_OP_CREATOR(ConcatV2, HostOp);
REGISTER_OP_CREATOR(Data, HostOp);
REGISTER_OP_CREATOR(Fill, HostOp);
} // namespace host_cpu
} // namespace ge

+ 7
- 2
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -15,7 +15,7 @@
*/

#include "hybrid/executor/hybrid_model_async_executor.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/model_manager/model_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/ge_context.h"
@@ -59,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
run_flag_ = true;
listener_ = listener;
future_ = std::async(std::launch::async, [&]() -> Status {
GetThreadLocalContext() = *executor_->GetContext()->ge_context;
GetContext().SetSessionId(executor_->GetContext()->session_id);
return RunInternal();
});
@@ -229,7 +230,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
}

GE_CHECK_GE(tensor_size, 0);
auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
AllocationAttr attr;
if (GetContext().GetHostExecFlag()) {
attr.SetMemType(HOST_DDR);
}
auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr);
GE_CHECK_NOTNULL(tensor_buffer);
args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));



+ 1
- 1
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -21,7 +21,7 @@
#include <future>
#include "external/ge/ge_api_error_codes.h"
#include "external/ge/ge_api_types.h"
#include "graph/load/new_model_manager/data_inputer.h"
#include "graph/load/model_manager/data_inputer.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "runtime/stream.h"



Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save