Browse Source

Merge branch 'master' of gitee.com:mindspore/graphengine into master

undefined
wangjiming Gitee 4 years ago
parent
commit
1ac3a89866
100 changed files with 1773 additions and 1859 deletions
  1. +0
    -10
      CMakeLists.txt
  2. +12
    -11
      build.sh
  3. +0
    -2
      ge/CMakeLists.txt
  4. +30
    -0
      ge/client/ge_api.cc
  5. +1
    -1
      ge/common/CMakeLists.txt
  6. +1
    -0
      ge/common/ge/plugin_manager.cc
  7. +4
    -11
      ge/common/helper/model_cache_helper.cc
  8. +3
    -9
      ge/common/helper/model_helper.cc
  9. +3
    -2
      ge/common/helper/om_file_helper.cc
  10. +4
    -7
      ge/common/model_parser/model_parser.cc
  11. +0
    -0
      ge/common/model_parser/model_parser.h
  12. +121
    -112
      ge/common/profiling/profiling_manager.cc
  13. +6
    -5
      ge/common/profiling/profiling_manager.h
  14. +1
    -3
      ge/executor/CMakeLists.txt
  15. +14
    -12
      ge/executor/ge_executor.cc
  16. +14
    -10
      ge/generator/ge_generator.cc
  17. +20
    -9
      ge/graph/build/memory/graph_mem_assigner.cc
  18. +3
    -2
      ge/graph/build/memory/graph_mem_assigner.h
  19. +2
    -2
      ge/graph/build/task_generator.cc
  20. +0
    -4
      ge/graph/execute/graph_execute.cc
  21. +3
    -11
      ge/graph/load/graph_loader.cc
  22. +2
    -2
      ge/graph/load/graph_loader.h
  23. +117
    -200
      ge/graph/load/model_manager/davinci_model.cc
  24. +3
    -3
      ge/graph/load/model_manager/davinci_model.h
  25. +0
    -23
      ge/graph/load/model_manager/davinci_model_parser.cc
  26. +0
    -46
      ge/graph/load/model_manager/davinci_model_parser.h
  27. +2
    -10
      ge/graph/load/model_manager/model_manager.cc
  28. +9
    -5
      ge/graph/manager/graph_caching_allocator.cc
  29. +4
    -4
      ge/graph/manager/graph_caching_allocator.h
  30. +22
    -18
      ge/graph/manager/graph_manager.cc
  31. +3
    -0
      ge/graph/manager/graph_manager.h
  32. +1
    -1
      ge/graph/manager/graph_mem_allocator.h
  33. +3
    -3
      ge/graph/passes/assign_remove_pass.cc
  34. +2
    -4
      ge/graph/passes/constant_folding_pass.cc
  35. +19
    -6
      ge/graph/passes/flow_ctrl_pass.cc
  36. +3
    -0
      ge/graph/passes/flow_ctrl_pass.h
  37. +13
    -9
      ge/graph/passes/hccl_continuous_memcpy_pass.cc
  38. +3
    -3
      ge/graph/passes/inplace_support_check_pass.cc
  39. +1
    -1
      ge/graph/passes/net_output_pass.cc
  40. +33
    -6
      ge/graph/passes/no_use_reshape_remove_pass.cc
  41. +3
    -0
      ge/graph/passes/no_use_reshape_remove_pass.h
  42. +1
    -3
      ge/graph/passes/prune_pass.cc
  43. +1
    -1
      ge/graph/passes/reshape_remove_pass.cc
  44. +1
    -1
      ge/graph/passes/subgraph_const_migration_pass.cc
  45. +5
    -4
      ge/graph/preprocess/graph_preprocess.cc
  46. +3
    -3
      ge/graph/preprocess/graph_preprocess.h
  47. +1
    -1
      ge/graph/preprocess/multi_batch_options.h
  48. +37
    -1
      ge/host_kernels/slice_kernel.cc
  49. +2
    -2
      ge/hybrid/executor/hybrid_execution_context.h
  50. +4
    -2
      ge/hybrid/executor/hybrid_model_async_executor.cc
  51. +2
    -1
      ge/hybrid/executor/hybrid_model_pipeline_executor.cc
  52. +1
    -1
      ge/hybrid/executor/node_state.h
  53. +2
    -2
      ge/hybrid/executor/subgraph_executor.cc
  54. +9
    -53
      ge/hybrid/executor/worker/execution_engine.cc
  55. +3
    -1
      ge/hybrid/model/hybrid_model_builder.cc
  56. +2
    -3
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  57. +158
    -28
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  58. +34
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  59. +2
    -3
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  60. +11
    -34
      ge/hybrid/node_executor/task_context.cc
  61. +3
    -6
      ge/hybrid/node_executor/task_context.h
  62. +6
    -6
      ge/offline/main.cc
  63. +3
    -2
      ge/session/omg.cc
  64. +7
    -23
      ge/single_op/single_op.cc
  65. +6
    -3
      ge/single_op/single_op_manager.cc
  66. +4
    -2
      ge/single_op/single_op_manager.h
  67. +13
    -9
      ge/single_op/single_op_model.cc
  68. +1
    -2
      ge/single_op/single_op_model.h
  69. +53
    -15
      ge/single_op/task/op_task.cc
  70. +12
    -4
      ge/single_op/task/op_task.h
  71. +144
    -44
      ge/single_op/task/tbe_task_builder.cc
  72. +32
    -1
      ge/single_op/task/tbe_task_builder.h
  73. +4
    -0
      inc/external/ge/ge_api.h
  74. +5
    -13
      inc/framework/common/ge_types.h
  75. +6
    -0
      inc/framework/executor/ge_executor.h
  76. +2
    -1
      inc/framework/generator/generator_api.h
  77. +2
    -1
      inc/framework/memory/memory_api.h
  78. +1
    -1
      metadef
  79. +1
    -1
      parser
  80. +0
    -1
      tests/CMakeLists.txt
  81. +5
    -0
      tests/depends/mmpa/CMakeLists.txt
  82. +7
    -3
      tests/depends/mmpa/src/mmpa_stub.cc
  83. +0
    -59
      tests/depends/omg/CMakeLists.txt
  84. +0
    -878
      tests/depends/omg/src/omg_stub.cc
  85. +12
    -4
      tests/depends/runtime/src/runtime_stub.cc
  86. +44
    -28
      tests/ut/ge/CMakeLists.txt
  87. +42
    -0
      tests/ut/ge/executor/ge_executor_unittest.cc
  88. +78
    -0
      tests/ut/ge/generator/ge_generator_unittest.cc
  89. +95
    -32
      tests/ut/ge/graph/build/mem_assigner_unittest.cc
  90. +20
    -1
      tests/ut/ge/graph/ge_executor_unittest.cc
  91. +0
    -1
      tests/ut/ge/graph/graph_load_unittest.cc
  92. +7
    -0
      tests/ut/ge/graph/load/davinci_model_unittest.cc
  93. +0
    -1
      tests/ut/ge/graph/load/model_manager_unittest.cc
  94. +1
    -2
      tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
  95. +87
    -0
      tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
  96. +113
    -0
      tests/ut/ge/hybrid/ge_hybrid_unittest.cc
  97. +28
    -4
      tests/ut/ge/single_op/single_op_model_unittest.cc
  98. +117
    -0
      tests/ut/ge/single_op/single_op_task_unittest.cc
  99. +35
    -0
      third_party/fwkacllib/inc/runtime/kernel.h
  100. +13
    -0
      third_party/fwkacllib/inc/runtime/rt_model.h

+ 0
- 10
CMakeLists.txt View File

@@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC)
find_module(runtime libruntime.so ${GE_LIB_PATH})
find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH})
find_module(resource libresource.so ${GE_LIB_PATH})
find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
@@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC)
else()
find_module(slog libalog.so ${ASCEND_ATC_DIR})
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
if(PRODUCT STREQUAL "flr3")
@@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC)
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
elseif(PRODUCT STREQUAL "flr1")
@@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC)
elseif(PLATFORM STREQUAL "all")
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
else()
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
@@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL)

# common libraries
find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

if (ENABLE_D)
@@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES)

# common libraries
find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)


+ 12
- 11
build.sh View File

@@ -76,8 +76,8 @@ checkopts()
ENABLE_GE_ST="on"
;;
t)
ENABLE_GE_UT="on"
;;
ENABLE_GE_UT="on"
;;
c)
ENABLE_GE_COV="on"
;;
@@ -185,7 +185,7 @@ build_graphengine()
# build all the target
TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}"
fi
make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
if [ $? -ne 0 ]
then
@@ -214,13 +214,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH}
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH}

${OUTPUT_PATH}/ut_libgraph &&
${OUTPUT_PATH}/ut_libge_multiparts_utest &&
${OUTPUT_PATH}/ut_libge_distinct_load_utest &&
${OUTPUT_PATH}/ut_libge_others_utest &&
${OUTPUT_PATH}/ut_libge_kernel_utest
RUN_TEST_CASE=${OUTPUT_PATH}/ut_libgraph && ${RUN_TEST_CASE} &&
RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_multiparts_utest && ${RUN_TEST_CASE} &&
RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_distinct_load_utest && ${RUN_TEST_CASE} &&
RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_others_utest && ${RUN_TEST_CASE} &&
RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_kernel_utest && ${RUN_TEST_CASE}
if [[ "$?" -ne 0 ]]; then
echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!"
echo -e "\033[31m${RUN_TEST_CASE}\033[0m"
exit 1;
fi
echo "Generating coverage statistics, please wait..."
@@ -249,8 +250,8 @@ generate_package()
NNENGINE_PATH="plugin/nnengine/ge_config"
OPSKERNEL_PATH="plugin/opskernel"

ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so")
FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so")
ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so")
FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so")
PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt")
PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so")

@@ -269,7 +270,7 @@ generate_package()
mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}"
cd "${OUTPUT_PATH}"

find ./ -name graphengine_lib.tar -exec rm {} \;


+ 0
- 2
ge/CMakeLists.txt View File

@@ -133,7 +133,6 @@ set(TRAIN_SRC_LIST
"graph/load/model_manager/data_dumper.cc"
"graph/load/model_manager/data_inputer.cc"
"graph/load/model_manager/davinci_model.cc"
"graph/load/model_manager/davinci_model_parser.cc"
"graph/load/model_manager/model_manager.cc"
"graph/load/model_manager/model_utils.cc"
"graph/load/model_manager/aipp_utils.cc"
@@ -613,7 +612,6 @@ set(INFER_SRC_LIST
"graph/load/model_manager/model_manager.cc"
"graph/load/model_manager/data_inputer.cc"
"graph/load/model_manager/davinci_model.cc"
"graph/load/model_manager/davinci_model_parser.cc"
"graph/load/model_manager/model_utils.cc"
"graph/load/model_manager/aipp_utils.cc"
"graph/load/model_manager/tbe_handle_store.cc"


+ 30
- 0
ge/client/ge_api.cc View File

@@ -32,6 +32,7 @@
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"
#include "common/util/error_manager/error_manager.h"
#include "toolchain/plog.h"

using domi::OpRegistry;
@@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitializeImpl(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "GEInitialize start");

ErrorManager::GetInstance().GenWorkStreamIdDefault();
// 0.check init status
if (g_ge_initialized) {
GELOGW("GEInitialize is called more than once");
@@ -157,6 +160,8 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
// GE finalize, releasing all resources
Status GEFinalize() {
GELOGT(TRACE_INIT, "GEFinalize start");

ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
if (!g_ge_initialized) {
GELOGW("GEFinalize is called before GEInitialize");
@@ -202,9 +207,19 @@ Status GEFinalize() {
return ret;
}

std::string GEGetErrorMsg() {
return ErrorManager::GetInstance().GetErrorMessage();
}

std::string GEGetWarningMsg() {
return ErrorManager::GetInstance().GetWarningMessage();
}

// Initialize session,which calls innerSession
Session::Session(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "Session Constructor start");

ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
sessionId_ = 0;
if (!g_ge_initialized) {
@@ -235,6 +250,8 @@ Session::Session(const std::map<string, string> &options) {

Session::Session(const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Session Constructor start");

ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
sessionId_ = 0;
if (!g_ge_initialized) {
@@ -311,11 +328,13 @@ Session::~Session() {

Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
std::map<std::string, std::string> options;
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
return AddGraph(graph_id, graph, options);
}

Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<s
Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
}

Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::map<AscendString, AscendString> options;
return AddGraphWithCopy(graph_id, graph, options);
}
@@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
Status Session::RemoveGraph(uint32_t graph_id) {
GELOGT(TRACE_INIT, "Session RemoveGraph start");

ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
// call RemoveGraph
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (!instance_ptr || !instance_ptr->InitFlag()) {
@@ -457,6 +480,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) {
Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
GELOGT(TRACE_INIT, "Session RunGraph start");

ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::vector<Tensor> graph_inputs = inputs;
// call RunGraph
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
@@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s
}

Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
}

Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
std::string str_key;
if (key != nullptr) {
str_key = key;
@@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu
}

Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo>

Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
RunAsyncCallback callback) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn
}

Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector<std::string> &var_names, std::vec
}

Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");


+ 1
- 1
ge/common/CMakeLists.txt View File

@@ -54,7 +54,7 @@ set(SRC_LIST
"util.cc"
"properties_manager.cc"
"types.cc"
"model_parser/base.cc"
"model_parser/model_parser.cc"
"kernel_store.cc"
"tbe_kernel_store.cc"
"cust_aicpu_kernel_store.cc"


+ 1
- 0
ge/common/ge/plugin_manager.cc View File

@@ -53,6 +53,7 @@ string PluginManager::GetPath() {
GELOGW("Failed to read the shared library file path!");
return string();
} else {
GE_IF_BOOL_EXEC(dl_info.dli_fname == nullptr, return string());
std::string so_path = dl_info.dli_fname;
char path[MMPA_MAX_PATH] = {0};
if (so_path.length() >= MMPA_MAX_PATH) {


+ 4
- 11
ge/common/helper/model_cache_helper.cc View File

@@ -14,22 +14,15 @@
* limitations under the License.
*/

#include <climits>
#include "common/helper/model_cache_helper.h"

#include <cstdio>
#include <fstream>
#include <functional>

#include "common/ge/ge_util.h"
#include "common/helper/model_cache_helper.h"
#include "common/types.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_types.h"
#include "common/model_parser/model_parser.h"
#include "framework/common/helper/model_helper.h"
#include "framework/common/util.h"
#include "graph/detail/attributes_holder.h"
#include "graph/detail/model_serialize_imp.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/model.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/tensor_utils.h"
#include "init/gelib.h"
@@ -1682,7 +1675,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
string key_path;
int32_t priority = 0;
ModelData model_data;
ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
ret = ModelParserBase::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
if (ret != SUCCESS) {
GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
return ret;


+ 3
- 9
ge/common/helper/model_helper.cc View File

@@ -16,16 +16,10 @@

#include "framework/common/helper/model_helper.h"

#include "common/ge/ge_util.h"
#include "common/util/error_manager/error_manager.h"
#include "framework/common/debug/log.h"
#include "framework/common/util.h"
#include "framework/common/debug/ge_log.h"
#include "common/model_parser/model_parser.h"
#include "framework/omg/model_tool.h"
#include "framework/omg/version.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/graph_utils.h"

using std::string;
@@ -465,7 +459,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c
return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA;
}

Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
if (status != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
return ACL_ERROR_GE_PARAM_INVALID;
@@ -514,7 +508,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod
return INTERNAL_ERROR;
}

Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
if (status != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!");
return ACL_ERROR_GE_PARAM_INVALID;


+ 3
- 2
ge/common/helper/om_file_helper.cc View File

@@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
return ACL_ERROR_GE_PARAM_INVALID;
}
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu",
partition_table->num, sizeof(ModelFileHeader), mem_offset);
if (model_data_size <= mem_offset) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
@@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m
"ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
index, partition_table->num, sizeof(ModelFileHeader), partition_table_size);
if (model_data_size <= cur_offset) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID,
"invalid model data, partition_table->num:%u, model data size %u",
partition_table->num, model_data_size);
return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;
}


ge/common/model_parser/base.cc → ge/common/model_parser/model_parser.cc View File

@@ -14,16 +14,13 @@
* limitations under the License.
*/

#include "common/model_parser/base.h"
#include "common/helper/model_helper.h"
#include <securec.h>
#include "common/model_parser/model_parser.h"

#include <fstream>
#include <memory>
#include <string>

#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/util.h"
#include "securec.h"
#include "common/helper/model_helper.h"

namespace ge {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {}

ge/common/model_parser/base.h → ge/common/model_parser/model_parser.h View File


+ 121
- 112
ge/common/profiling/profiling_manager.cc View File

@@ -20,6 +20,8 @@
#include "framework/common/debug/log.h"
#include "framework/common/string_util.h"
#include "graph/ge_context.h"
#include "graph/utils/type_utils.h"
#include "graph/types.h"
#include "runtime/base.h"
#include "graph/load/model_manager/davinci_model.h"

@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point";
#ifdef DAVINCI_SUPPORT_PROFILING
const size_t kReportMaxLen = 2048;
const int32_t kMaxDeviceNum = 256;
const uint32_t kInteval = 2;
const std::string kConfigNumsdev = "devNums";
const std::string kConfigDevIdList = "devIdList";
const std::string kProfStart = "prof_start";
const std::string kProfStop = "prof_stop";
const std::string kProfModelSubscribe = "prof_model_subscribe";
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
const std::string kModelName = "model_name";
const std::string kModelId = "model_id";
const std::string kOpNmae = "op_name";
const std::string kOptype = "op_type";
const std::string kBlockDim = "block_dims";
const std::string kTaskId = "task_id";
const std::string kStreamId = "stream_id";
const std::string kShapeType = "shape_type";
const std::string kCurIterNum = "cur_iter_num";
const std::string kTaskType = "task_type";
const std::string kInput = "input";
const std::string kOutput = "output";
const std::string kFormat = "format";
const std::string kDataType = "data_type";
const std::string kShape = "shape";
const std::string kIdx = "idx";

#endif
} // namespace

@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo(
const TaskDescInfo &task, Json &task_json) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::string data;
for (const auto &task : task_desc_info) {
std::string model_name = task.model_name;
std::string op_name = task.op_name;
uint32_t block_dim = task.block_dim;
uint32_t task_id = task.task_id;
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
uint32_t task_type = task.task_type;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
.append(std::to_string(task_id)).append(" ")
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append(" ")
.append(std::to_string(task_type)).append("\n");

ReporterData reporter_data{};
reporter_data.deviceId = device_id;
reporter_data.data = (unsigned char *)data.c_str();
reporter_data.dataLen = data.size();
int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info"));
if (ret != EOK) {
GELOGE(ret, "Report data tag of task_desc_info memcpy error!");
return;
}

int32_t cb_ret = CallMsprofReport(reporter_data);
if (cb_ret != 0) {
GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret);
return;
}
for (size_t i = 0; i < task.input_format.size(); i++) {
Json tmp_input;
tmp_input[kIdx] = i;
Format format = task.input_format[i];
tmp_input[kFormat] = TypeUtils::FormatToSerialString(format);
DataType data_type = task.input_data_type[i];
tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
tmp_input[kShape] = task.input_shape[i];
task_json[kInput] += tmp_input;
}

for (size_t i = 0; i < task.output_format.size(); i++) {
Json tmp_output;
tmp_output[kIdx] = i;
Format format = task.output_format[i];
tmp_output[kFormat] = TypeUtils::FormatToSerialString(format);
DataType data_type = task.output_data_type[i];
tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
tmp_output[kShape] = task.output_shape[i];
task_json[kOutput] += tmp_output;
}

data.clear();
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::string data;
for (const auto &graph : compute_graph_desc_info) {
data.append("model_name:")
.append(graph.model_name)
.append(" op_name:")
.append(graph.op_name)
.append(" op_type:")
.append(graph.op_type);
for (size_t i = 0; i < graph.input_format.size(); ++i) {
data.append(" input_id:")
.append(std::to_string(i))
.append(" input_format:")
.append(std::to_string(graph.input_format.at(i)))
.append(" input_data_type:")
.append(std::to_string(graph.input_data_type.at(i)))
.append(" input_shape:\"");
size_t input_shape_len = graph.input_shape.at(i).size();
if (input_shape_len == 0) {
data.append("");
} else if (input_shape_len == 1) {
data.append(std::to_string(graph.input_shape.at(i).at(0)));
} else {
for (size_t j = 0; j < input_shape_len - 1; ++j) {
data.append(std::to_string(graph.input_shape.at(i).at(j))).append(",");
}
data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1)));
}

data.append("\"");
}

for (size_t i = 0; i < graph.output_format.size(); ++i) {
data.append(" output_id:")
.append(std::to_string(i))
.append(" output_format:")
.append(std::to_string(graph.output_format.at(i)))
.append(" output_data_type:")
.append(std::to_string(graph.output_data_type.at(i)))
.append(" output_shape:\"");
size_t output_shape_len = graph.output_shape.at(i).size();
if (output_shape_len == 0) {
data.append("");
} else if (output_shape_len == 1) {
data.append(std::to_string(graph.output_shape.at(i).at(0)));
} else {
for (size_t j = 0; j < output_shape_len - 1; ++j) {
data.append(std::to_string(graph.output_shape.at(i).at(j))).append(",");
}
data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1)));
}
data.append("\"");
for (const auto &task : task_desc_info) {
Json task_info;
task_info[kModelName] = task.model_name;
task_info[kModelId] = model_id;
task_info[kOpNmae] = task.op_name;
task_info[kOptype] = task.op_type;
task_info[kBlockDim] = task.block_dim;
task_info[kTaskType] = task.task_type;
task_info[kTaskId] = task.task_id;
task_info[kStreamId] = task.stream_id;
task_info[kCurIterNum] = task.cur_iter_num;
task_info[kShapeType] = task.shape_type;
ProfilingOpInputOutInfo(task, task_info);

std::string reported_data;
try {
reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
return ;
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
return;
}

data.append(" model_id:").append(std::to_string(model_id));
data.append(" task_id:").append(std::to_string(graph.task_id));
data.append(" stream_id:").append(std::to_string(graph.stream_id));
data.append("\n");

GraphDescReport(device_id, data);
data.clear();
reported_data.append(",")
.append("\n");
ReportData(device_id, reported_data, "task_desc_info");
}
#endif
}

void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData(
const int32_t &device_id, const string &data, const string &tag_name) {
#ifdef DAVINCI_SUPPORT_PROFILING
ReporterData reporter_data{};
int ret = -1;
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d
size_t index = data.size() / kReportMaxLen;
if (index >= 1) {
reporter_data.deviceId = device_id;
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);
for (size_t i = 0; i < index; ++i) {
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i;
reporter_data.dataLen = kReportMaxLen;
cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
reporter_data.dataLen = data.size() - kReportMaxLen * index;
if (reporter_data.dataLen != 0) {
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index;
cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
} else {
reporter_data.deviceId = device_id;
reporter_data.data = (unsigned char *)data.c_str();
reporter_data.dataLen = data.size();
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);

cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) {
#ifdef DAVINCI_SUPPORT_PROFILING
int32_t logic_device_id = 0;
rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
GELOGD("current logic_device_id:%d", logic_device_id);
GELOGD("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
GELOGD("start ProfilingGraphDescInfo.");
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
GELOGD("Report profiling data for GE end.");
#endif
}
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs
static_cast<void *>(&reporter_data), sizeof(ReporterData));
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo(
const OpDescPtr &op, TaskDescInfo &task_desc_info) const {
std::vector<Format> input_format;
std::vector<std::vector<int64_t>> input_shape;
std::vector<DataType> input_data_type;
for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
if (input_tensor_desc == nullptr) {
continue;
}
input_format.emplace_back(input_tensor_desc->GetFormat());
input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
input_data_type.emplace_back(input_tensor_desc->GetDataType());
}
std::vector<Format> output_format;
std::vector<std::vector<int64_t>> output_shape;
std::vector<DataType> output_data_type;
for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
if (output_tensor_desc == nullptr) {
continue;
}
output_format.emplace_back(output_tensor_desc->GetFormat());
output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
output_data_type.emplace_back(output_tensor_desc->GetDataType());
}

std::vector<Format> format_default = { FORMAT_NULL };
std::vector<std::vector<int64_t>> shape_default = { {0} };
std::vector<DataType> data_type_default = { DT_UNDEFINED };
task_desc_info.input_format = input_format.empty() ? format_default : input_format;
task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape;
task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type;
task_desc_info.output_format = output_format.empty() ? format_default : output_format;
task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape;
task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
std::string &fp_point, std::string &bp_point) {
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init


+ 6
- 5
ge/common/profiling/profiling_manager.h View File

@@ -54,6 +54,8 @@ namespace {

} // namespace
namespace ge {
class OpDesc;
using OpDescPtr = std::shared_ptr<OpDesc>;
struct DeviceSubsInfo {
uint64_t module;
uint32_t subscribe_count;
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingModelExecuteOn() const;
// is_execute_profiling_ only used by ge option and env
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info);
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const int32_t &device_id);
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
const int32_t &device_id);
void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json);
Status PluginInit() const;
void PluginUnInit() const;
Status CallMsprofReport(ReporterData &reporter_data) const;
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
private:
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
Status ParseOptions(const std::string &options);
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
vector<int32_t> &device_list);
uint64_t GetProfilingModule();
void GraphDescReport(const int32_t &device_id, const string &data);
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);



+ 1
- 3
ge/executor/CMakeLists.txt View File

@@ -33,7 +33,6 @@ set(SRC_LIST
"../model/ge_model.cc"
"../model/ge_root_model.cc"
"../graph/load/model_manager/davinci_model.cc"
"../graph/load/model_manager/davinci_model_parser.cc"
"../graph/load/model_manager/model_manager.cc"
"../graph/load/model_manager/tbe_handle_store.cc"
"../graph/load/model_manager/cpu_queue_schedule.cc"
@@ -250,15 +249,14 @@ target_link_options(ge_executor_shared PRIVATE
target_link_libraries(ge_executor_shared PRIVATE
$<BUILD_INTERFACE:intf_pub>
msprofiler
static_mmpa
-Wl,--no-as-needed
ge_common
runtime
slog
mmpa
graph
register
error_manager
ascend_hal_stub
ascend_protobuf
c_sec
-Wl,--as-needed


+ 14
- 12
ge/executor/ge_executor.cc View File

@@ -16,7 +16,6 @@

#include "executor/ge_executor.h"
#include <cce/cce.h>
#include <cce/compiler_stub.h>
#include <ctime>
#include <iostream>
#include "common/debug/log.h"
@@ -24,19 +23,11 @@
#include "common/helper/model_helper.h"
#include "common/profiling/profiling_manager.h"
#include "common/dump/dump_manager.h"
#include "common/util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/execute/graph_execute.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/model.h"
#include "graph/utils/graph_utils.h"
#include "mmpa/mmpa_api.h"
#include "single_op/single_op_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/load/model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"

@@ -454,7 +445,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &
if (all_data_dims[i] < 0) {
cur_dynamic_dims.push_back(dynamic_dims[i]);
} else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) {
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld",
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID,
"Static dims should be same, index: %zu value: %lu should be %ld",
i, dynamic_dims[i], all_data_dims[i]);
return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID;
}
@@ -930,12 +922,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size

Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
SingleOp **single_op) {
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op);
return LoadSingleOpV2(model_name, modelData, stream, single_op, 0);
}

Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id) {
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id);
}

Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op) {
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op);
return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0);
}

Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id) {
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id);
}

Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,


+ 14
- 10
ge/generator/ge_generator.cc View File

@@ -147,7 +147,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
return FAILED;
}

static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index,
static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
bool attr) {
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
@@ -671,6 +671,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
bool is_offline) {
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
if (!is_offline) {
(void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true);
}
@@ -709,8 +711,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GELOGI("ATC parser success in single op build.");

GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
GE_CHECK_NOTNULL(ge_root_model);
@@ -723,7 +723,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph();
GeModelPtr &ge_model = name_to_ge_model.begin()->second;
GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph));
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
GELOGI("After build model, The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());

bool all_shape = false;
(void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
@@ -738,6 +738,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
} else {
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
}
GELOGI("Start save GeModel to Model buffer");
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
return SUCCESS;
}
@@ -753,10 +754,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
*/
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, const string &model_file_name) {
GELOGI("Start to build single op offline model.");
GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
ModelBufferData model_buff;
OpEngineType engine_type = ENGINE_SYS;
return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
GELOGI("Finish build single offline model, status: %u", status);
return status;
}

/**
@@ -772,8 +775,10 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type,
ModelBufferData &model_buff) {
GELOGI("Start to build single op online");
return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
GELOGI("Finish build single online model, status: %u", status);
return status;
}

Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
@@ -798,8 +803,7 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor
}
} else {
for (const auto &in_desc : inputs) {
GeTensorDesc input_desc = in_desc.GetTensorDesc();
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true));
arg_index++;
}
}


+ 20
- 9
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
}

ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
int64_t dim_index, int64_t &output_mem_size,
int64_t &batch_dim_num, int64_t &out_size) {
int64_t dim_index, int64_t &output_mem_size,
int64_t &batch_dim_num, int64_t &out_size) {
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
if (graph_status != GRAPH_SUCCESS) {
GELOGE(FAILED, "Opdesc GetSize failed!");
@@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str());
return FAILED;
}
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second),
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
"Assign node %s continuous input memory failed.", node->GetName().c_str())
}
for (auto pair : memory_offset_) {
@@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
}

Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) {
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
auto iter = memory_offset_.find(memory_type);
if (iter == memory_offset_.end()) {
@@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
std::map<int32_t, int32_t> out2ins;
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str());
// output is beginning offset, set offset for input; only support this case now
if (out2ins.size() == 1 && out2ins.begin()->second == 0) {
if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
peer_op_desc->SetOutputOffset(output_list);
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(),
output_list_this.at(out2ins.begin()->first), peer_output_offset);
} else {
GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first,
out2ins.begin()->second, out2ins.size());
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
}
// first input is beginning offset
mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
@@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3
bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
if (in_node->GetType() == VARIABLE) {
GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
in_node->GetName().c_str());
return true;
}
auto iter = node_2_continuous_type.find(in_node);
// In node's topo order in the front, so function can not be exception
auto continuous_type = iter->second;
@@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
}

ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
uint32_t continuous_type) {
uint32_t continuous_type,
bool reverse_refresh) {
int64_t mem_clean_start = 0;
int64_t mem_clean_size = 0;
int64_t memory_type = RT_MEMORY_HBM;

GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed.");
auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
continuous_type, reverse_refresh);
if (ret != ge::SUCCESS) {
GELOGE(ret, "Assign continuous input memory failed!");
return ret;


+ 3
- 2
ge/graph/build/memory/graph_mem_assigner.h View File

@@ -131,13 +131,14 @@ class GraphMemoryAssigner {
std::map<NodePtr, uint32_t> &node_2_continuous_type);

ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
uint32_t continuous_type);
uint32_t continuous_type, bool reverse_refresh=false);

ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
map<string, vector<NodePtr>> &connecting_output_atomic_nodes);

ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type);
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type,
bool reverse_refresh = false);

ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type);



+ 2
- 2
ge/graph/build/task_generator.cc View File

@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi

// subgraph of dynamic graph no need to find index, has been found in parent graph
if (IsSubGraphOfDynamicGraph(graph)) {
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str());
return SUCCESS;
}

@@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
}
GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
profiling_point.end_index.size() );
profiling_point.end_index.size());

bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) {


+ 0
- 4
ge/graph/execute/graph_execute.cc View File

@@ -19,12 +19,8 @@
#include <memory>
#include <string>

#include "common/ge_inner_error_codes.h"
#include "common/model_parser/base.h"
#include "graph/load/model_manager/model_manager.h"
#include "omm/csa_interact.h"
#include "runtime/dev.h"
#include "runtime/mem.h"

namespace ge {
GraphExecutor::GraphExecutor()


+ 3
- 11
ge/graph/load/graph_loader.cc View File

@@ -20,19 +20,13 @@
#include <vector>

#include "common/helper/model_helper.h"
#include "common/util.h"
#include "common/model_parser/model_parser.h"
#include "graph/ge_context.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "omm/csa_interact.h"
#include "runtime/dev.h"

namespace ge {
GraphLoader::GraphLoader() = default;

GraphLoader::~GraphLoader() = default;

Status GraphLoader::UnloadModel(uint32_t model_id) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
@@ -120,7 +114,6 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) {

Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority,
ModelData &model_data) {
Status ret;
if (!CheckInputPathValid(path)) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
@@ -132,16 +125,15 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
return ACL_ERROR_GE_PARAM_INVALID;
}

ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}
return SUCCESS;
return ret;
}

Status GraphLoader::CommandHandle(const Command &command) {


+ 2
- 2
ge/graph/load/graph_loader.h View File

@@ -32,9 +32,9 @@
namespace ge {
class GraphLoader {
public:
GraphLoader();
GraphLoader() = default;

virtual ~GraphLoader();
virtual ~GraphLoader() = default;

GraphLoader(const GraphLoader &in) = delete;



+ 117
- 200
ge/graph/load/model_manager/davinci_model.cc View File

@@ -92,9 +92,35 @@ const uint32_t kEndOfSequence = 0x0704000a;
const uint32_t kEndOfSequenceNew = 507005;
const int32_t kModelAbortNormal = 0x0704000e;
const int32_t kModelAbortNormalNew = 507024;
const uint32_t kInteval = 2;
const char *const kModelName = "model_name";
const char *const kModeleId = "model_id";
const char *const kLoadStartTime = "load_start_time";
const char *const kLoadEndTime = "load_end_time";
const char *const kFusionOpInfo = "fusion_op_info";
const char *const kFusionOpName = "fusion_op_name";
const char *const kOriginalOpNum = "origin_op_num";
const char *const kOriginalOpName = "origin_op_name";
const char *const kStreamId = "stream_id";
const char *const kFusionOpMemoryInfo = "memory_info";
const char *const kInputSize = "input_size";
const char *const kOutputSize = "output_size";
const char *const kWeightSize = "weight_size";
const char *const kWorkSpaceSize = "workspace_size";
const char *const kTotalSize = "total_size";
const char *const kTaskCount = "task_count";
const char *const kTaskId = "task_id";
const char* const kRequestId = "request_id";
const char* const kThreadId = "thread_id";
const char* const kInputBeginTime = "input_begin_time";
const char* const kInputEndTime = "input_end_time";
const char* const kInferBeginTime = "infer_begin_time";
const char* const kInferEndTime = "infer_end_time";
const char* const kOutputBeginTime = "output_start_time";
const char* const kOutputEndTime = "output_end_time";

inline bool IsDataOp(const std::string &node_type) {
return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE;
return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE);
}

inline bool IsTbeTask(const OpDescPtr &op_desc) {
@@ -187,12 +213,12 @@ DavinciModel::~DavinciModel() {
UnbindTaskSinkStream();
for (size_t i = 0; i < label_list_.size(); ++i) {
if (label_list_[i] != nullptr) {
GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i);
GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i);
}
}

for (size_t i = 0; i < stream_list_.size(); ++i) {
GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i);
GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i);
}

for (size_t i = 0; i < event_list_.size(); ++i) {
@@ -360,7 +386,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu].",
runtime_param_.graph_id, mem_base_, data_size);

if (!is_inner_weight_base_) {
@@ -381,7 +407,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
is_inner_p2p_mem_base_ = true;
}

GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed");
runtime_param_.mem_base = mem_base_;
runtime_param_.weight_base = weights_mem_base_;
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_;
@@ -391,7 +417,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
Status DavinciModel::InitVariableMem() {
// malloc variable memory base
var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM);
if (TotalVarMemSize() && var_mem_base_ == nullptr) {
if (TotalVarMemSize() && (var_mem_base_ == nullptr)) {
Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize());
if (ret != SUCCESS) {
GELOGE(ret, "Malloc variable memory failed.");
@@ -500,25 +526,25 @@ Status DavinciModel::DoTaskSink() {
}

GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_));
GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_);
GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_);

GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");

if (known_node_) {
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed.");
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed");
}

GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed.");
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");

GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed.");
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");

GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");

GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));

@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

Status DavinciModel::ReportProfilingData() {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
Status ret = GetComputeGraphInfo(compute_graph_desc_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetComputeGraphInfo failed.");
return ret;
}
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo());
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");

return SUCCESS;
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() {
}

Status DavinciModel::SinkModelProfile() {
// profiling plugin must be registered
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};
// report model data tag name
std::string tag_name("model_load_info_" + std::to_string(this->Id()));
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");

// Model Header
std::string name = om_name_.empty() ? name_ : om_name_;
size_t name_len = name.size();
reporter_data.deviceId = device_id_;
reporter_data.data = (unsigned char *)&name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)name.c_str();
reporter_data.dataLen = name.size();
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

uint32_t model_id = this->Id();
reporter_data.data = (unsigned char *)&model_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// Load Start/End Time
int64_t start_time = this->GetLoadBeginTime();
reporter_data.data = (unsigned char *)&start_time;
reporter_data.dataLen = sizeof(int64_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

int64_t end_time = this->GetLoadEndTime();
reporter_data.data = (unsigned char *)&end_time;
reporter_data.dataLen = sizeof(int64_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Json model_load_info;
model_load_info[kModelName] = name;
model_load_info[kModeleId] = model_id;
model_load_info[kLoadStartTime] = start_time;
model_load_info[kLoadEndTime] = end_time;
// fusion op info
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const ProfileInfo &profile : profile_list_) {
// op name after fusion
Json fusion_op_info;
string fusion_op_name = profile.fusion_info.op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
uint32_t op_num = profile.fusion_info.original_op_names.size();
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

vector<string> original_name;
for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = profile.fusion_info.original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
original_name.emplace_back(profile.fusion_info.original_op_names[k]);
}
uint32_t stream_id = 0;
auto iter = profiler_report_op_info_.find(fusion_op_name);
if (iter != profiler_report_op_info_.end()) {
stream_id = iter->second.second;
}
reporter_data.data = (unsigned char *)&stream_id;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
reporter_data.data = (unsigned char *)&profile.memory_info;
reporter_data.dataLen = sizeof(profile.memory_info);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&profile.task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

fusion_op_info[kFusionOpName] = fusion_op_name;
fusion_op_info[kOriginalOpNum] = op_num;
fusion_op_info[kOriginalOpName] = original_name;
fusion_op_info[kStreamId] = stream_id;
fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size;
fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size;
fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size;
fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size;
fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size;
fusion_op_info[kTaskCount] = profile.task_count;
vector<uint32_t> task_id;
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
task_id.push_back(idx->second);
}
fusion_op_info[kTaskId] = task_id;
model_load_info[kFusionOpInfo] += fusion_op_info;
}

std::string tag_name("model_load_info_" + std::to_string(this->Id()));
std::string reported_data;
try {
reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, tag_name);
return SUCCESS;
}

Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
// profiling plugin must be registered
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};

string name = om_name_.empty() ? name_ : om_name_;
Json model_time_info;
model_time_info[kModelName] = name;
model_time_info[kModeleId] = this->Id();
model_time_info[kRequestId] = current_data.request_id;
model_time_info[kThreadId] = GetDataInputTid();
model_time_info[kInputBeginTime] = time_info_.processBeginTime;
model_time_info[kInputEndTime] = time_info_.processEndTime;
model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime;
model_time_info[kInferEndTime] = time_info_.inferenceEndTime;
model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime;
model_time_info[kOutputEndTime] = time_info_.dumpEndTime;

// report model data tag name
std::string tag_name;
tag_name.append("model_time_info_")
.append(std::to_string(this->Id()))
.append("_")
.append(std::to_string(current_data.index));

GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");
// device id
reporter_data.deviceId = device_id_;

// Model Header
string name;
if (!om_name_.empty()) {
name = om_name_;
} else {
name = name_;
}
size_t name_len = name.size();
reporter_data.data = (unsigned char *)&name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)name.c_str();
reporter_data.dataLen = name.size();
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// request id
uint64_t request_id = current_data.request_id;
reporter_data.data = (unsigned char *)&request_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

// thread id
int32_t thread_id = GetDataInputTid();
reporter_data.data = (unsigned char *)&thread_id;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

// time info
time_info_.modelId = this->Id();
reporter_data.data = (unsigned char *)&time_info_;
reporter_data.dataLen = sizeof(struct timeInfo);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);
.append(std::to_string(this->Id()))
.append("_")
.append(std::to_string(current_data.index));
std::string reported_data;
try {
reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, tag_name);

return SUCCESS;
}
@@ -2641,6 +2589,7 @@ void *DavinciModel::Run(DavinciModel *model) {
bool seq_end_flag = false;
uint32_t model_id = model->Id();
uint32_t device_id = model->GetDeviceId();
GetContext().SetWorkStreamId(model->GetWorkStreamId());

GELOGI("Model Run thread start, model_id:%u.", model_id);
rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id));
@@ -2807,6 +2756,7 @@ Status DavinciModel::ModelRunStart() {
int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal);
maxDumpOpNum_ = maxDumpOpNum;

work_stream_id_ = GetContext().WorkStreamId();
CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this);
GELOGI("model tread create success, model id:%u.", model_id_);
return SUCCESS;
@@ -3069,13 +3019,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.op_type = op->GetType();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
// task type
task_desc_info.task_type = kTaskTypeInvalid;
auto &prof_mgr = ProfilingManager::Instance();
prof_mgr.GetOpInputOutputInfo(op, task_desc_info);
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (model_task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel();
@@ -3107,7 +3059,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
task_desc_info_.emplace_back(task_desc_info);
}
}
return;
}

Status DavinciModel::DistributeTask() {
@@ -3332,7 +3283,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp
///
Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) {
string input_or_output = "input";
string input_or_output;
is_input ? input_or_output = "input" : input_or_output = "output";
if (blobs.size() != data_info.size()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu",
@@ -3342,7 +3293,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &

for (const auto &data : data_info) {
if (data.first >= blobs.size()) { // check data index.
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
input_or_output.c_str(), data.first, blobs.size());
return ACL_ERROR_GE_PARAM_INVALID;
}
@@ -4007,41 +3959,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
}

Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
for (auto &op_desc : all_op_desc) {
ComputeGraphDescInfo compute_graph_info;
if (!om_name_.empty()) {
compute_graph_info.model_name = om_name_;
} else {
compute_graph_info.model_name = name_;
}

std::vector<Format> format = { FORMAT_NULL };
std::vector<std::vector<int64_t>> shape = { {0} };
std::vector<DataType> data_type = { DT_UNDEFINED };
compute_graph_info.op_name = op_desc.op_name;
compute_graph_info.op_type = op_desc.op_type;
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format;
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto iter = profiler_report_op_info_.find(op_desc.op_name);
if (iter != profiler_report_op_info_.end()) {
task_id = iter->second.first;
stream_id = iter->second.second;
}
compute_graph_info.task_id = task_id;
compute_graph_info.stream_id = stream_id;
graph_desc_info.emplace_back(compute_graph_info);
}
return SUCCESS;
}

void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;
@@ -4133,10 +4050,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op
int64_t data_input_size;
(void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size);
GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s",
index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
}
}



+ 3
- 3
ge/graph/load/model_manager/davinci_model.h View File

@@ -412,6 +412,8 @@ class DavinciModel {
///
uint64_t GetSessionId() const { return session_id_; }

uint64_t GetWorkStreamId() const { return work_stream_id_; }

///
/// @ingroup ge
/// @brief SetDeviceId
@@ -840,9 +842,6 @@ class DavinciModel {

Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

// get desc info of graph for profiling
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

Status InitL1DataDumperArgs();
@@ -960,6 +959,7 @@ class DavinciModel {
vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.

uint64_t session_id_;
uint64_t work_stream_id_;

uint32_t device_id_;



+ 0
- 23
ge/graph/load/model_manager/davinci_model_parser.cc View File

@@ -1,23 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/load/model_manager/davinci_model_parser.h"

namespace ge {
DavinciModelParser::DavinciModelParser() {}

DavinciModelParser::~DavinciModelParser() {}
} // namespace ge

+ 0
- 46
ge/graph/load/model_manager/davinci_model_parser.h View File

@@ -1,46 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_

#include <securec.h>
#include <memory>

#include "common/debug/log.h"
#include "common/ge_types.h"
#include "common/model_parser/base.h"
#include "common/types.h"
#include "common/util.h"

namespace ge {
class DavinciModelParser : public ModelParserBase {
public:
///
/// @ingroup hiai
/// @brief constructor
///
DavinciModelParser();

///
/// @ingroup hiai
/// @brief destructor
///
~DavinciModelParser();
};
} // namespace ge

#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_

+ 2
- 10
ge/graph/load/model_manager/model_manager.cc View File

@@ -18,23 +18,15 @@

#include <string>

#include "mmpa/mmpa_api.h"
#include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
#include "common/model_parser/model_parser.h"
#include "common/dump/dump_manager.h"
#include "common/l2_cache_optimize.h"
#include "common/profiling/profiling_manager.h"
#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h"
#include "graph/common/local_context.h"
#include "graph/utils/attr_utils.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"

namespace ge {
thread_local uint32_t device_count = 0;
@@ -1403,7 +1395,7 @@ Status ModelManager::LaunchCustAicpuSo() {
Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) {
uint8_t *model_data = nullptr;
uint32_t model_len = 0;
Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);
Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!");

OmFileLoadHelper om_file_helper;


+ 9
- 5
ge/graph/manager/graph_caching_allocator.cc View File

@@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
kBinSizeUnit8 * kMByteSize,
kBinSizeUnit32 * kMByteSize,
kBinSizeUnit128 * kMByteSize,
kGByteSize,
kBinSizeUnit4 * kGByteSize,
kBinSizeUnit16 * kGByteSize,
kBinSizeUnit26 * kGByteSize};
kBinSizeUnit256 * kMByteSize,
kBinSizeUnit512 * kMByteSize,
kGByteSize};

static bool BlockComparator(const Block *left, const Block *right) {
if (left->size != right->size) {
@@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) {

size_t GetAllocationSize(size_t size) {
size_t index = GetBinIndex(size);
return bin_ranges[index];
if (bin_ranges[index] >= size) {
return bin_ranges[index];
}
return kGByteSize * ((size + kGByteSize - 1) / kGByteSize);
}

///
@@ -119,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) {
}

uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) {
GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id);
uint8_t *ptr = nullptr;
size = GetBlockSize(size);
Block *block = FindFreeBlock(size, org_ptr, device_id);
@@ -253,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui
}

Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) {
GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id);
auto memory_size = GetAllocationSize(size);
const std::string purpose = "Memory for caching.";
auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id);


+ 4
- 4
ge/graph/manager/graph_caching_allocator.h View File

@@ -36,17 +36,17 @@ namespace ge {
constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes
constexpr size_t kBinSizeUnit4 = 4;
constexpr size_t kBinSizeUnit8 = 8;
constexpr size_t kBinSizeUnit16 = 16;
constexpr size_t kBinSizeUnit26 = 26;
constexpr size_t kBinSizeUnit32 = 32;
constexpr size_t kBinSizeUnit128 = 128;
constexpr size_t kBinSizeUnit256 = 256;
constexpr size_t kBinSizeUnit512 = 512;

constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold
constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold
constexpr size_t kKByteSize = 1024;
constexpr size_t kMByteSize = 1048576; // 1024 * 1024
constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024

static const uint32_t kNumBins = 8;
static const uint32_t kNumBins = 7;

class MemoryAllocator;



+ 22
- 18
ge/graph/manager/graph_manager.cc View File

@@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) {
return FAILED;
}
if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) {
GELOGI("Need to process multi batch for compute graph.");
GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType().c_str());
GetLocalOmgContext().need_multi_batch = true;
break;
}
@@ -348,7 +348,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
for (auto &subgraph : compute_graph->GetAllSubgraphs()) {
(void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
}
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0].");
}

GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
@@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@@ -734,8 +734,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
}

Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) {
GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id,
static_cast<int>(mode), ge::GetContext().DeviceId());
GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.",
session_id, graph_id, static_cast<int>(mode), ge::GetContext().DeviceId());

rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
@@ -758,7 +758,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {

GE_TIMESTAMP_START(RunCustomPass);
GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);
GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.",
GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail",
comp_graph->GetName().c_str());
GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass");
return SUCCESS;
@@ -776,7 +776,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()),
"BuildJsonObject Failed")

GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s",
GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s.",
compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(),
compute_graph->GetName().c_str());
GE_DUMP(compute_graph, "PreRunBegin");
@@ -797,7 +797,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
if (run_optimize_original_graph) {
Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s.", compute_graph->GetName().c_str());
GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str());
return ret;
}
}
@@ -869,7 +869,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
// release rts generate context
RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId());
if (ret != SUCCESS) {
GELOGE(ret, "PreRun Failed.");
GELOGE(ret, "PreRun Failed. graph_id:%u", graph_node->GetGraphId());
return ret;
}
}
@@ -1209,7 +1209,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const

Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) {
GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
GELOGD("[BuildGraph] start to build graph, graph_id:%u.", graph_id);
if (inputs.empty()) {
GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs");
}
@@ -1241,7 +1241,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
graph_node->SetRunFlag(false);
if (ret != SUCCESS) {
GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed!");
GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u", graph_id);
return GE_GRAPH_PRERUN_FAILED;
}

@@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass",
new (std::nothrow)
LinkGenMaskNodesPass(options_.stream_max_parallel_num)));
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass",
new (std::nothrow) HcclContinuousMemcpyPass));
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass",
new (std::nothrow) HcclContinuousMemcpyPass));

GE_TIMESTAMP_START(after_merge_passes);
auto ret = after_merge_passes.Run(compute_graph);
@@ -2509,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
const SubGraphInfoPtr &sub_graph_info_ptr,
const std::string &root_graph_name,
uint64_t session_id,
uint64_t work_stream_id,
const GEThreadLocalContext &ge_context) {
if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) {
GetContext().SetWorkStreamId(work_stream_id);
GetContext().SetSessionId(session_id);
GetThreadLocalContext() = ge_context;
graph_manager->UpdateLocalOmgContext(root_graph_id);
@@ -2557,7 +2558,8 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge
uint64_t session_id, RunAsyncCallback callback) {
GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size());

bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, GetThreadLocalContext(), callback}));
bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id,
GetContext().WorkStreamId(), GetThreadLocalContext(), callback}));
if (!ret) {
GELOGE(FAILED, "[GraphManager] Run graph async failed, graph_id=%u.", graph_id);
return FAILED;
@@ -2644,6 +2646,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {

GELOGI("A new loop start.");

GetContext().SetWorkStreamId(args.work_stream_id);
GetContext().SetSessionId(args.session_id);
GetThreadLocalContext() = args.context;
graph_manager->UpdateLocalOmgContext(args.graph_id);
@@ -2725,8 +2728,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
ge_root_model = graph_node->GetGeRootModel();
}

graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor,
ge_root_model, GetThreadLocalContext(), args.callback }));
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id,
args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback }));
GELOGI("Loop end.");
}
}
@@ -2825,6 +2828,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) {

GELOGI("A new loop start.");

GetContext().SetWorkStreamId(args.work_stream_id);
GetContext().SetSessionId(args.session_id);
GetThreadLocalContext() = args.context;
graph_manager->UpdateLocalOmgContext(args.graph_id);


+ 3
- 0
ge/graph/manager/graph_manager.h View File

@@ -196,6 +196,7 @@ class GraphManager {
GraphId graph_id;
std::vector<ge::InputTensorInfo> input_tensor;
uint64_t session_id;
uint64_t work_stream_id;
GEThreadLocalContext context;
RunAsyncCallback callback;
};
@@ -204,6 +205,7 @@ class GraphManager {
GraphNodePtr graph_node;
GraphId graph_id;
uint64_t session_id;
uint64_t work_stream_id;
std::vector<ge::InputTensorInfo> input_tensor;
GeRootModelPtr ge_root_model;
GEThreadLocalContext context;
@@ -221,6 +223,7 @@ class GraphManager {
const SubGraphInfoPtr &sub_graph_info_ptr,
const std::string &root_graph_name,
uint64_t session_id,
uint64_t work_stream_id,
const GEThreadLocalContext &ge_context);
Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);


+ 1
- 1
ge/graph/manager/graph_mem_allocator.h View File

@@ -26,6 +26,7 @@

#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/node.h"
#include "runtime/mem.h"

@@ -139,7 +140,6 @@ class MemoryAllocator {
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
class CachingAllocator;
class RdmaPoolAllocator;
class HostMemAllocator;
class MemManager {
public:
MemManager();


+ 3
- 3
ge/graph/passes/assign_remove_pass.cc View File

@@ -24,9 +24,9 @@ namespace {
constexpr uint32_t kValidInputNodeOutputNum = 1;
constexpr int32_t kAssignRefInputIndex = 0;
constexpr int32_t kAssignValueInputIndex = 1;
static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}

Status AssignRemovePass::Run(NodePtr &node) {


+ 2
- 4
ge/graph/passes/constant_folding_pass.cc View File

@@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node,
return FoldingPass::RunOpKernel(node, inputs, outputs);
}

const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
&ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
return statistic_of_ge_constant_folding_;
}

const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
&ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
return statistic_of_op_constant_folding_;
}



+ 19
- 6
ge/graph/passes/flow_ctrl_pass.cc View File

@@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) {
return NOT_CHANGED;
}

GELOGI("FlowCtrl pass begin");
GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str());
bool graph_change = false;
// 1. Add FP/BP flow ctrl (big cycle)
for (auto &node : compute_graph->GetDirectNode()) {
@@ -80,6 +80,16 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) {
graph_change = true;
}
}

// add edge operation below depends on memcpy node in itertor loop set single stream,or may cause block
for (auto &active_node : active_nodes_in_iter_loop_) {
auto ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(),
assign_add_node_in_fpbp_loop_->GetInControlAnchor());
if (ret != GRAPH_SUCCESS) {
GELOGW("add control edge between iter_loop_node:%s and fpbp_loop_node:%s fail, may cause block",
active_node->GetName().c_str(), assign_add_node_in_fpbp_loop_->GetName().c_str());
}
}
GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed");
return graph_change ? SUCCESS : NOT_CHANGED;
}
@@ -279,16 +289,16 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co
* loopIncrement
*/
// Insert AssignAdd node
NodePtr assign_add_node =
assign_add_node_in_fpbp_loop_ =
InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node);
if (assign_add_node == nullptr || switch_node == nullptr) {
if (assign_add_node_in_fpbp_loop_ == nullptr || switch_node == nullptr) {
GELOGE(PARAM_INVALID, "assign add node or switch node is null");
return FAILED;
}

string active_name = switch_node->GetName() + "_StreamActive";
// add attr for stream assign model to break branch.
GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed");
GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node_in_fpbp_loop_, active_name), "set stream label failed");

// used for stream assign to find true branch
GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed");
@@ -304,13 +314,15 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co
DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED);

// add ctrl edges
graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor());
graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(),
assign_add_node_in_fpbp_loop_->GetInControlAnchor());
if (add_ret != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret);
return FAILED;
}

add_ret = GraphUtils::AddEdge(assign_add_node->GetOutControlAnchor(), active_node->GetInControlAnchor());
add_ret = GraphUtils::AddEdge(assign_add_node_in_fpbp_loop_->GetOutControlAnchor(),
active_node->GetInControlAnchor());
if (add_ret != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret);
return FAILED;
@@ -533,6 +545,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph,
GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed");
// used for stream assign to find active stream
GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed");
active_nodes_in_iter_loop_.push_back(active_node);
return SUCCESS;
}
} // namespace ge

+ 3
- 0
ge/graph/passes/flow_ctrl_pass.h View File

@@ -142,6 +142,9 @@ class FlowCtrlPass : public GraphPass {
/// false: only one dataSet exist
///
bool CheckMultiDataSet(ComputeGraphPtr &compute_graph);

NodePtr assign_add_node_in_fpbp_loop_ = nullptr;
std::vector<NodePtr> active_nodes_in_iter_loop_;
};
} // namespace ge



+ 13
- 9
ge/graph/passes/hccl_continuous_memcpy_pass.cc View File

@@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) {
/// @param [in] ge::OutDataAnchorPtr in_node
/// @return ge::NodePtr
///
NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph,
const OutDataAnchorPtr &out_data_anchor) {
GE_CHECK_NOTNULL_EXEC(graph, return nullptr);
NodePtr pre_node = out_data_anchor->GetOwnerNode();
OpDescPtr pre_op_desc = pre_node->GetOpDesc();
@@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node
/// @param [in] InDataAnchorPtr hccl_in_anchor
/// @return status
///
Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph,
const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode());

@@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra
/// @param [in] InDataAnchorPtr hccl_in_anchor
/// @return status
///
Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph,
const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
hccl_in_anchor->GetOwnerNode()->GetName().c_str());
NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
@@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr
/// @return status
///
Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
const OutDataAnchorPtr &var_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
const OutDataAnchorPtr &var_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) {
if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) {
GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str());
return SUCCESS;
@@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG
/// @param [in] ge::OutDataAnchorPtr variable node out anchor
/// @return ge::NodePtr
///
NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
GE_CHECK_NOTNULL_EXEC(graph , return nullptr);
NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph,
const OutDataAnchorPtr &out_data_anchor) {
GE_CHECK_NOTNULL_EXEC(graph, return nullptr);
NodePtr pre_node = out_data_anchor->GetOwnerNode();
OpDescPtr pre_op_desc = pre_node->GetOpDesc();
if (pre_op_desc == nullptr) {


+ 3
- 3
ge/graph/passes/inplace_support_check_pass.cc View File

@@ -23,9 +23,9 @@ namespace ge {
namespace {
constexpr uint32_t kInplaceSupportOutputIndex = 0;
constexpr uint32_t kInplaceSupportOutputNum = 1;
static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}
Status InplaceSupportCheckPass::Run(NodePtr &node) {
GELOGD("InplaceSupportCheckPass running");


+ 1
- 1
ge/graph/passes/net_output_pass.cc View File

@@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null.");
return GE_GRAPH_PARAM_NULLPTR;
}
GELOGI("NetOutputPass Run.");
GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str());
NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT);
// save user targets node
SaveAndRemoveTargets(graph);


+ 33
- 6
ge/graph/passes/no_use_reshape_remove_pass.cc View File

@@ -82,14 +82,41 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) {
}
}
if (to_be_deleted) {
GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str());
auto ret = PassUtils::UnlinkNodeWithControlCopy(node, kReshapeShapeIndex);
if (ret != SUCCESS) {
GELOGE(ret, "DimensionAdjustPass unlink node with control copy fail.");
return ret;
}
auto ret = TryRemoveConstShapeInput(node);
GE_CHK_STATUS_RET_NOLOG(ret);
GELOGI("NoUseReshapeRemovePass remove useless reshape node:%s", node->GetName().c_str());
return IsolateAndDeleteNode(node, {kReshapeDataIndex});
}
return SUCCESS;
}

Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_node) {
auto shape_input_anchor = reshape_node->GetInDataAnchor(kReshapeShapeIndex);
if (shape_input_anchor == nullptr) {
return SUCCESS;
}
GE_CHECK_NOTNULL(shape_input_anchor->GetPeerOutAnchor());
auto shape_input = shape_input_anchor->GetPeerOutAnchor()->GetOwnerNode();
GE_CHECK_NOTNULL(shape_input);
if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) {
return SUCCESS;
}
// op(x) const(shape)
// \ /
// reshape
// const input can unlink but should copy control_dependency
auto ret = PassUtils::UnlinkNodeWithControlCopy(reshape_node, kReshapeShapeIndex);
if (ret != SUCCESS) {
GELOGE(ret, "Unlink node %s with control copy failed.", shape_input->GetName().c_str());
return ret;
}

// remove const without any data_output
if (shape_input->GetOutDataNodesSize() == 0) {
auto ret = IsolateAndDeleteNode(shape_input, {});
GE_CHK_GRAPH_STATUS_RET(ret, "Fail to remove node %s", shape_input->GetName().c_str());
GELOGI("Remove useless shape input const %s.", shape_input->GetName().c_str());
}
return SUCCESS;
}
} // namespace ge

+ 3
- 0
ge/graph/passes/no_use_reshape_remove_pass.h View File

@@ -32,6 +32,9 @@ class NoUseReshapeRemovePass : public BaseNodePass {
/// @author
///
Status Run(ge::NodePtr &node) override;

private:
Status TryRemoveConstShapeInput(NodePtr &reshape_node);
};
} // namespace ge



+ 1
- 3
ge/graph/passes/prune_pass.cc View File

@@ -27,12 +27,11 @@

namespace ge {
Status PrunePass::Run(ge::ComputeGraphPtr graph) {
GELOGD("PrunePass Start");
GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str());
if (graph == nullptr) {
GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL.");
return GE_GRAPH_ISNULL;
}

std::vector<NodePtr> out_nodes;
std::unordered_set<NodePtr> nodes;
for (NodePtr &node_ptr : graph->GetDirectNode()) {
@@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) {
out_nodes.push_back(node_ptr);
}
}

if (out_nodes.empty()) {
GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str());
return ge::SUCCESS;


+ 1
- 1
ge/graph/passes/reshape_remove_pass.cc View File

@@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()];
switch(key) {
switch (key) {
case kReshapeType: {
bool is_shape_unknown = false;
if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) {


+ 1
- 1
ge/graph/passes/subgraph_const_migration_pass.cc View File

@@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra

// Break Move and follow, Link Data and follow.
const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex);
const auto in_anchors =out_anchor->GetPeerInDataAnchors();
const auto in_anchors = out_anchor->GetPeerInDataAnchors();
for (const auto in_anchor : in_anchors) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());


+ 5
- 4
ge/graph/preprocess/graph_preprocess.cc View File

@@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option,
vector<vector<std::pair<int64_t, int64_t>>> &range_vec) {
// check both mode and shape_range option are all enabled

auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute");
if (!enable_dynamic_execute_mode) {
@@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) {
return SUCCESS;
}

Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) {
Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input,
const std::map<string, string> &graph_option) {
// Get shape range of input in dynamic_execute mode
vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec;
vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec;
auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec);
GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode.");
compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format));
@@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() {
return SUCCESS;
}

Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) {
Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,
const std::map<string, string> &graph_option) {
compute_graph_->SetInputSize(user_input.size());
if (user_input.empty()) {
return SUCCESS;


+ 3
- 3
ge/graph/preprocess/graph_preprocess.h View File

@@ -23,7 +23,7 @@
#include <vector>
#include "common/debug/log.h"
#include "common/debug/memory_dumper.h"
#include "common/model_parser/base.h"
#include "common/model_parser/model_parser.h"
#include "common/properties_manager.h"
#include "common/string_util.h"
#include "common/types.h"
@@ -63,8 +63,8 @@ class GraphPrepare {
Status CheckRefOp();
Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode);
Status AdjustDataOpOutput(const NodePtr &node);
Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option);
Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option);
Status CheckConstOp();
Status VerifyConstOp(const NodePtr &node);
Status CheckUserInput(const std::vector<GeTensor> &user_input);


+ 1
- 1
ge/graph/preprocess/multi_batch_options.h View File

@@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con
/// @return 0: true/false
///
GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
const std::string &input_format);
const std::string &input_format);

} // namespace multibatch
} // namespace ge


+ 37
- 1
ge/host_kernels/slice_kernel.cc View File

@@ -16,6 +16,8 @@

#include "host_kernels/slice_kernel.h"

#include <set>

#include "common/ge_inner_error_codes.h"
#include "common/op/ge_op_utils.h"
#include "common/types.h"
@@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3;
const size_t kSliceInputIndexX = 0;
const size_t kSliceInputIndexBegin = 1;
const size_t kSliceInputIndexSize = 2;
const std::set<ge::DataType> kSupportedDataTypeToLength = {
DT_BOOL,
DT_INT64,
DT_UINT64,
DT_FLOAT,
DT_INT32,
DT_UINT32,
DT_INT8,
DT_UINT8,
DT_INT16,
DT_UINT16,
DT_FLOAT16,
DT_DOUBLE,
DT_DUAL,
DT_DUAL_SUB_INT8,
DT_DUAL_SUB_UINT8,
DT_COMPLEX64,
DT_COMPLEX128,
DT_QINT8,
DT_QINT16,
DT_QINT32,
DT_QUINT8,
DT_QUINT16,
};
} // namespace

Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input,
@@ -56,6 +82,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso

// data type in input_x
auto data_type = x_->GetTensorDesc().GetDataType();
// check supported
if (kSupportedDataTypeToLength.count(data_type) == 0) {
GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str());
return NOT_CHANGED;
}
uint32_t type_size = 0;
bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size);
if (!is_success) {
return NOT_CHANGED;
}
// check data type of begin and size
if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) {
GELOGW("Data type of begin and size for slice are not DT_INT32.");
@@ -69,7 +105,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso
GE_CHECK_NOTNULL(begin_data);
GE_CHECK_NOTNULL(size_data);

size_t data_size = x_->GetData().size() / sizeof(int32_t);
size_t data_size = x_->GetData().size() / type_size;
size_t begin_size = begin->GetData().size() / sizeof(int32_t);
size_t size_size = size->GetData().size() / sizeof(int32_t);
const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape();


+ 2
- 2
ge/hybrid/executor/hybrid_execution_context.h View File

@@ -62,9 +62,9 @@ struct GraphExecutionContext {
rtStream_t stream = nullptr;
rtContext_t rt_context = nullptr;
rtContext_t rt_gen_context = nullptr;
std::unique_ptr<CallbackManager> callback_manager;
std::unique_ptr<CallbackManager> callback_manager = nullptr;
NpuMemoryAllocator *allocator = nullptr;
mutable std::unique_ptr<HybridProfiler> profiler;
mutable std::unique_ptr<HybridProfiler> profiler = nullptr;
DumpProperties dump_properties;
bool trace_enabled = false;
bool dump_enabled = false;


+ 4
- 2
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -26,6 +26,7 @@ namespace hybrid {
namespace {
const int kDataOutputIndex = 0;
const size_t kMinimumPiplineStages = 2;
const int kDefaultLoopCount = 10;
}
HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model)
: model_(model), run_flag_(false) {
@@ -150,7 +151,7 @@ Status HybridModelAsyncExecutor::RunInternal() {
GELOGI("HybridModel will execute in pipeline mode");
auto iter_per_run = std::getenv("ITER_NUM");
if (iter_per_run) {
args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, 10));
args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, kDefaultLoopCount));
}
ret = pipe_executor_->Execute(args);
} else {
@@ -250,7 +251,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
if (k >= shape.GetDimNum()) {
break;
}
if (shape.GetDim(k) < range[k].first || shape.GetDim(k) > range[k].second) {
// range[k].second can be -1
if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) {
GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]",
input_index, k, shape.GetDim(k), range[k].first, range[k].second);
return PARAM_INVALID;


+ 2
- 1
ge/hybrid/executor/hybrid_model_pipeline_executor.cc View File

@@ -8,6 +8,7 @@ namespace ge {
namespace hybrid {
namespace {
constexpr int kNumExecutors = 2;
const int kMinLoopCount = 2;
const int kIntBase = 10;
const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
}
@@ -208,7 +209,7 @@ Status HybridModelPipelineExecutor::InitStageExecutors() {

Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
int loop_count = args.num_loops;
GE_CHECK_GE(loop_count, 2);
GE_CHECK_GE(loop_count, kMinLoopCount);

auto &inputs = args.inputs;
auto &input_desc = args.input_desc;


+ 1
- 1
ge/hybrid/executor/node_state.h View File

@@ -30,7 +30,7 @@ class NodeTask;
struct GraphExecutionContext;
class SubgraphContext;
class TaskContext;
class NodeState;
struct NodeState;

class ShapeFuture {
public:


+ 2
- 2
ge/hybrid/executor/subgraph_executor.cc View File

@@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) {
Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
GetContext().SetSessionId(context_->context_id);
HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
"[%s] Failed to InferShape.", node_state.GetName().c_str());
"[%s] Failed to InferShape.", node_state.GetName().c_str());
GetContext().SetSessionId(context_->session_id);
HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
return SUCCESS;
}



+ 9
- 53
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -70,8 +70,6 @@ class NodeDoneCallback {
Status PrepareConstInputs(const NodeItem &node_item);
Status DumpDynamicNode();
Status ProfilingReport();
Status GetGraphDescInfo(const NodePtr node, const HybridModel *model,
std::vector<ComputeGraphDescInfo> &compute_graph_info);
Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
std::vector<TaskDescInfo> &task_desc_info);
GraphExecutionContext *graph_context_;
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
}

GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
auto &prof_mgr = ProfilingManager::Instance();
task_desc_info = context_->GetProfilingTaskDescInfo();
context_->ClearProfilingTaskDescInfo();

return SUCCESS;
}

Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model,
std::vector<ComputeGraphDescInfo> &compute_graph_info) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(model);

GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str());
compute_graph_info = context_->GetProfilingGraphDescInfo();
context_->ClearProfilingGraphDescInfo();

auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (auto &tmp_compute_graph_info : compute_graph_info) {
// default
if (op_desc->GetAllInputsSize() == 0) {
tmp_compute_graph_info.input_format = { FORMAT_NULL };
tmp_compute_graph_info.input_shape = { {0} };
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
}
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
continue;
}
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
}

if (op_desc->GetOutputsSize() == 0) {
tmp_compute_graph_info.output_format = { FORMAT_NULL };
tmp_compute_graph_info.output_shape = { {0} };
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
}
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
}
for (auto &tmp_task_desc : task_desc_info) {
// save op input and output info
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc);
}

return SUCCESS;
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() {
return profiling_ret;
}

std::vector<ComputeGraphDescInfo> compute_graph_info;
profiling_ret = GetGraphDescInfo(node, model, compute_graph_info);
if (profiling_ret != RT_ERROR_NONE) {
GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str());
return profiling_ret;
}

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info);
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info);
return SUCCESS;
}

@@ -323,6 +277,8 @@ Status NodeDoneCallback::OnNodeDone() {
node_item.NodeName().c_str());
}

// release workspace
context_->ReleaseWorkspace();
// release inputs
for (int i = 0; i < context_->NumInputs(); ++i) {
context_->ReleaseInput(i);


+ 3
- 1
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
op_index = task_def.kernel_with_handle().context().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
continue;
@@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
}

auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
}



+ 2
- 3
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return FAILED;
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 158
- 28
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size";
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
} // namespace

TbeHandleHolder::TbeHandleHolder(void *bin_handle)
: bin_handle_(bin_handle) {}

TbeHandleHolder::~TbeHandleHolder() {
if (bin_handle_ != nullptr) {
GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
}
}

bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
auto ret = registered_handles_.emplace(std::move(holder));
return ret.second;
}

Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
@@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
if (rt_ret != RT_ERROR_NONE || is_single_op_) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
@@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
kernel_store.ReferTBEHandle(stub_name_.c_str());
}
std::string kernel_name;
@@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());
Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
TbeHandleRegistry &registry = TbeHandleRegistry::GetInstance();
auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str());
return INTERNAL_ERROR;
}

void *bin_handle = nullptr;
GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string),
GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
binary.data = tbe_kernel->GetBinData();
binary.length = tbe_kernel->GetBinDataSize();
GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle));
handle_ = bin_handle;
auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
return SUCCESS;
}

Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
stub_name_ = kernel_def.stub_func();

GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));

GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
args_size_ = kernel_def.args_size();
block_dim_ = kernel_def.block_dim();

// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_def.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);
if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
@@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
block_dim_,
arg_base_,
args_size_);
return SUCCESS;
}

Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle();
const domi::KernelContext &context = kernel_with_handle.context();

GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc));
original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_";
node_info_ = kernel_with_handle.node_info() + "/";
args_size_ = kernel_with_handle.args_size();
block_dim_ = kernel_with_handle.block_dim();
// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_with_handle.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_);

if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
return INTERNAL_ERROR;
}

if (context.args_offset().size() < sizeof(uint16_t)) {
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
return INTERNAL_ERROR;
}

const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
GELOGE(INTERNAL_ERROR,
"[%s] Arg offset out of range. offset = %u, arg size = %u",
GetName().c_str(),
offset,
args_size_);
return INTERNAL_ERROR;
}

arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());

if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) {
GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def));
} else {
GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def));
}
return SUCCESS;
}

Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type != RT_MODEL_TASK_KERNEL) {
if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) {
GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
return INTERNAL_ERROR;
}

const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
@@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
}

Status AiCoreOpTask::PrepareWithShape(TaskContext &context) {
if (tiling_buffer_ != nullptr) {
if (is_dynamic_) {
return UpdateTilingInfo(context);
}

return SUCCESS;
}

@@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
clear_atomic_ = tiling_info.clear_atomic;

tiling_data_ = tiling_info.tiling_data.str();
tiling_key_ = tiling_info.tiling_key;
GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
if (tiling_data_.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str());
GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
return SUCCESS;
}
if (tiling_buffer_ == nullptr) {
GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!");
return INTERNAL_ERROR;
}

@@ -238,6 +351,9 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info)
GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info),
"Failed calc tiling data of node %s.",
node->GetName().c_str());
if (is_single_op_) {
tiling_info.clear_atomic = false;
}
GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str());
return SUCCESS;
}
@@ -296,16 +412,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
}

Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
if (handle_ != nullptr) {
std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + std::to_string(tiling_key_);
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
} else {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
}
return SUCCESS;
}

Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
bool dynamic_supported = false;
(void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported);
if (!dynamic_supported) {
(void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_);
if (!is_dynamic_) {
GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str());
return SUCCESS;
}
@@ -314,22 +440,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
int64_t max_size = -1;
(void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size);
GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size);
if (max_size <= 0) {
if (max_size < 0) {
GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
return PARAM_INVALID;
}

auto allocator = NpuMemoryAllocator::GetAllocator();
GE_CHECK_NOTNULL(allocator);
tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
GE_CHECK_NOTNULL(tiling_buffer_);
if (max_size > 0) {
tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
GE_CHECK_NOTNULL(tiling_buffer_);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
} else {
GELOGD("op_param_size is 0, no need to create tiling buffer.");
}

GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
return SUCCESS;
}

bool AiCoreOpTask::IsDynamicShapeSupported() {
return tiling_buffer_ != nullptr;
return is_dynamic_;
}

const std::string &AiCoreOpTask::GetName() const {


+ 34
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -28,6 +28,32 @@

namespace ge {
namespace hybrid {
class TbeHandleHolder {
public:
TbeHandleHolder(void *bin_handle);
~TbeHandleHolder();

void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }

private:
friend class TbeHandleRegistry;
void *bin_handle_ = nullptr;
};

class TbeHandleRegistry {
public:
static TbeHandleRegistry &GetInstance() {
static TbeHandleRegistry instance;
return instance;
}

bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder);

private:
std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_;
};

class AiCoreOpTask {
public:
AiCoreOpTask() = default;
@@ -67,6 +93,9 @@ class AiCoreOpTask {
Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
Status InitTilingInfo(const OpDesc &op_desc);
Status RegisterTbeHandle(const OpDesc &op_desc);
Status RegisterKernelHandle(const OpDesc &op_desc);
Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def);
Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def);

std::string stub_name_;
void *stub_func_ = nullptr;
@@ -76,6 +105,11 @@ class AiCoreOpTask {
bool clear_atomic_ = true;
bool is_single_op_ = false;
std::vector<int> output_indices_to_skip_;
string original_kernel_key_;
string node_info_;
uint32_t tiling_key_ = 0;
void *handle_ = nullptr;
bool is_dynamic_ = false;
};

class AtomicAddrCleanOpTask : public AiCoreOpTask {


+ 2
- 3
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return FAILED;
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");


+ 11
- 34
ge/hybrid/node_executor/task_context.cc View File

@@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context,

TaskContext::~TaskContext() {
GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str());
for (auto ws_addr : workspaces_) {
execution_context_->allocator->Deallocate(ws_addr);
}

// release output
for (int i = 0; i < NumOutputs(); ++i) {
auto output_tensor = MutableOutput(i);
@@ -49,6 +45,13 @@ TaskContext::~TaskContext() {
}
}

void TaskContext::ReleaseWorkspace() {
GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str());
for (auto ws_addr : workspaces_) {
execution_context_->allocator->Deallocate(ws_addr);
}
}

std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state,
GraphExecutionContext *execution_context,
SubgraphContext *subgraph_context) {
@@ -512,21 +515,21 @@ Status TaskContext::Synchronize() {
}

Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
uint32_t task_type, uint32_t block_dim) {
const std::string &task_type, uint32_t block_dim) {
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
const GraphExecutionContext * graph_context = GetExecutionContext();
const GraphExecutionContext *graph_context = GetExecutionContext();
GE_CHECK_NOTNULL(graph_context);
const HybridModel *model = graph_context->model;
GE_CHECK_NOTNULL(model);

std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.op_name = op_desc->GetName();
tmp_task_desc_info.op_type = op_desc->GetType();
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_type = task_type;
tmp_task_desc_info.task_id = task_id;
@@ -543,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const {
return node_state_;
}

Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
const GraphExecutionContext * graph_context = GetExecutionContext();
GE_CHECK_NOTNULL(graph_context);
const HybridModel *model = graph_context->model;
GE_CHECK_NOTNULL(model);

std::string dynamic_model_name = model->GetModelName();
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
ComputeGraphDescInfo tmp_compute_graph_info;
tmp_compute_graph_info.model_name = dynamic_model_name;
tmp_compute_graph_info.op_name = op_desc->GetName();
tmp_compute_graph_info.op_type = op_desc->GetType();
tmp_compute_graph_info.task_id = task_id;
tmp_compute_graph_info.stream_id = stream_id;
compute_graph_info.emplace_back(tmp_compute_graph_info);
}
}
return SUCCESS;
}

} // namespace hybrid
} // namespace ge

+ 3
- 6
ge/hybrid/node_executor/task_context.h View File

@@ -56,6 +56,7 @@ class TaskContext {
void ReleaseInputsAndOutputs();
bool NeedCallback();
void ReleaseInput(int index);
void ReleaseWorkspace();
const TensorValue *GetInput(int index) const;
const TensorValue *GetOutput(int index) const;
TensorValue *MutableOutput(int index);
@@ -112,13 +113,10 @@ class TaskContext {
void *handle_ = nullptr;

const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim);
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
const std::string &task_type, uint32_t block_dim);
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; }
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id);
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); }

private:
TaskContext(GraphExecutionContext *execution_context,
NodeState *node_state,
@@ -140,7 +138,6 @@ class TaskContext {
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
std::vector<ComputeGraphDescInfo> compute_graph_info;
};
} // namespace hybrid
} // namespace ge


+ 6
- 6
ge/offline/main.cc View File

@@ -62,19 +62,18 @@ using std::shared_ptr;
using std::string;
using std::vector;

namespace {
static bool is_dynamic_input = false;

const char *const kModeSupport = "only support 0(model to framework model), "
"1(framework model to json), 3(only pre-check), "
"5(pbtxt to json), 6(display model info)";
const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)";

static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model";
static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model";
static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model";

const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model";
const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model";
const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model";
// limit available mem size 2G
const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024
} // namespace

DEFINE_string(model, "", "The model file.");
DEFINE_string(output, "", "The output file path&name.");
@@ -1326,6 +1325,7 @@ int init(int argc, char* argv[]) {
return ret;
}

ErrorManager::GetInstance().GenWorkStreamIdDefault();
return 0;
}



+ 3
- 2
ge/session/omg.cc View File

@@ -23,7 +23,7 @@
#include "common/debug/memory_dumper.h"
#include "common/ge/ge_util.h"
#include "common/helper/model_helper.h"
#include "common/model_parser/base.h"
#include "common/model_parser/model_parser.h"
#include "common/model_saver.h"
#include "common/properties_manager.h"
#include "common/string_util.h"
@@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js
} else {
ErrorManager::GetInstance().ATCReportErrMessage("E10003",
{"parameter", "value", "reason"}, {"om", model_file, "invalid om file"});
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param.");
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"ParseModelContent failed because of invalid om file. Please check --om param.");
}

if (model.model_data != nullptr) {


+ 7
- 23
ge/single_op/single_op.cc View File

@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
return SUCCESS;
}

string model_name;
string op_name;
TaskDescInfo tmp_task_desc_info;
uint32_t model_id;
uint32_t block_dim;
if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) {
if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
return ACL_ERROR_GE_PARAM_INVALID;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str());
std::vector<TaskDescInfo> task_desc_info;
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("ProfilingReport of op[%s] model[%s] start.",
tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str());

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = shape_type;
tmp_task_desc_info.cur_iter_num = 0;
tmp_task_desc_info.task_type = op_task->GetTaskType();
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

std::vector<ComputeGraphDescInfo> compute_graph_info;
std::vector<TaskDescInfo> task_desc_info;
task_desc_info.emplace_back(tmp_task_desc_info);

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info);
profiling_manager.ReportProfilingData(model_id, task_desc_info);
return SUCCESS;
}
} // namespace


+ 6
- 3
ge/single_op/single_op_manager.cc View File

@@ -30,8 +30,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManag
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name,
const ModelData &model_data,
void *stream,
SingleOp **single_op) {
GELOGI("GetOpFromModel in. model name = %s", model_name.c_str());
SingleOp **single_op,
const uint64_t model_id) {
GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id);
if (single_op == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null");
return ACL_ERROR_GE_INTERNAL_ERROR;
@@ -99,7 +100,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) {
Status SingleOpManager::GetDynamicOpFromModel(const string &model_name,
const ModelData &model_data,
void *stream,
DynamicSingleOp **single_op) {
DynamicSingleOp **single_op,
const uint64_t model_id) {
GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id);
if (!tiling_func_registered_) {
RegisterTilingFunc();
}


+ 4
- 2
ge/single_op/single_op_manager.h View File

@@ -37,12 +37,14 @@ class SingleOpManager {
Status GetOpFromModel(const std::string &model_name,
const ge::ModelData &model_data,
void *stream,
SingleOp **single_op);
SingleOp **single_op,
const uint64_t model_id);

Status GetDynamicOpFromModel(const std::string &model_name,
const ge::ModelData &model_data,
void *stream,
DynamicSingleOp **dynamic_single_op);
DynamicSingleOp **dynamic_single_op,
const uint64_t model_id);

StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream);



+ 13
- 9
ge/single_op/single_op_model.cc View File

@@ -190,7 +190,7 @@ Status SingleOpModel::LoadAllNodes() {
auto node = nodes.at(i);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
op_list_[i] = node;
op_list_[op_desc->GetId()] = node;
auto op_type = op_desc->GetType();
GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str());

@@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
auto ret = BuildKernelTask(task_def, &tbe_task);
if (ret != SUCCESS) {
return ret;
}
@@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
}
}

Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) {
Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
GE_CHECK_NOTNULL(task);
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
@@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def);
auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
auto ret = builder.BuildTask(*tbe_task, model_params_);
if (ret != SUCCESS) {
delete tbe_task;
@@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
}

Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();

auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
@@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
task_def.DebugString().c_str());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;


+ 1
- 2
ge/single_op/single_op_model.h View File

@@ -24,7 +24,6 @@
#include <vector>

#include "common/helper/model_helper.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "single_op/single_op.h"
#include "single_op/stream_resource.h"

@@ -67,7 +66,7 @@ class SingleOpModel {

Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);


+ 53
- 15
ge/single_op/task/op_task.cc View File

@@ -23,6 +23,7 @@
#include "aicpu/common/aicpu_task_struct.h"
#include "common/dump/dump_manager.h"
#include "common/dump/dump_op.h"
#include "common/profiling/profiling_manager.h"
#include "common/formats/formats.h"
#include "common/math/math_util.h"
#include "framework/common/debug/log.h"
@@ -93,6 +94,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size
op_desc_ = op_desc;
}

void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc,
const domi::KernelDefWithHandle &kernel_def_with_handle) {
SetKernelArgs(std::move(args), arg_size, block_dim, op_desc);
original_kernel_key_ = kernel_def_with_handle.original_kernel_key();
node_info_ = kernel_def_with_handle.node_info();
}

void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; }

void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
@@ -100,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
model_id_ = model_id;
}

Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id,
uint32_t &block_dim) {
model_name = model_name_;
model_id = model_id_;
block_dim = block_dim_;
Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) {
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_CHECK_NOTNULL(op_desc_);
op_name = op_desc_->GetName();
string op_name = op_desc_->GetName();
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
model_id = model_id_;
task_desc_info.model_name = model_name_;
task_desc_info.block_dim = block_dim_;
task_desc_info.task_id = task_id;
task_desc_info.stream_id = stream_id;
task_desc_info.op_name = op_name;
task_desc_info.op_type = op_desc_->GetType();
auto &prof_mgr = ProfilingManager::Instance();
prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info);
return SUCCESS;
}

Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
return UNSUPPORTED;
}
@@ -145,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
return UNSUPPORTED;
}

uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }
const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; }

TbeOpTask::~TbeOpTask() {
if (sm_desc_ != nullptr) {
@@ -163,7 +186,11 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }

const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

void TbeOpTask::SetHandle(void *handle) {
this->handle_ = handle;
}

Status TbeOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
@@ -204,8 +231,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
}
block_dim_ = run_info.block_dim;
tiling_data_ = run_info.tiling_data.str();
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_,
tiling_data_.size());
tiling_key_ = run_info.tiling_key;
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
tiling_data_.size(), tiling_key_);

GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces");
return SUCCESS;
@@ -329,8 +357,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
}

GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
if (handle_ == nullptr) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
} else {
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
}

return SUCCESS;
}

@@ -363,7 +400,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
num_inputs_,
num_outputs_,
unknown_type_));
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!");
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION,
"Malloc aicpu_ext_handle mem failed!");

Status ret = aicpu_ext_handle_->Parse(kernel_ext_info);
if (ret != SUCCESS) {
@@ -401,7 +439,7 @@ Status AiCpuBaseTask::SetInputConst() {
return SUCCESS;
}

Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
std::vector<GeTensorDesc> &output_desc,
rtStream_t stream) {
GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_);
@@ -811,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
return DoUpdateArgTable(param, false);
}

uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }
const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }

void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());


+ 12
- 4
ge/single_op/task/op_task.h View File

@@ -43,7 +43,7 @@ class OpTask {
const vector<GeTensorDesc> &output_desc);
virtual Status UpdateArgTable(const SingleOpModelParam &param);
void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim);
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(rtStream_t stream);
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@@ -52,7 +52,7 @@ class OpTask {
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &output_buffers,
rtStream_t stream);
virtual uint32_t GetTaskType() const;
virtual const std::string &GetTaskType() const;

protected:
Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -78,6 +78,8 @@ class TbeOpTask : public OpTask {
void SetSmDesc(void *sm_desc);
void SetStubFunc(const std::string &name, const void *stub_func);
void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc);
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);

Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) override;
@@ -86,7 +88,8 @@ class TbeOpTask : public OpTask {
size_t GetArgSize() const;
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
uint32_t GetTaskType() const override;
const std::string &GetTaskType() const override;
void SetHandle(void *handle);

private:
friend class SingleOpModel;
@@ -107,6 +110,11 @@ class TbeOpTask : public OpTask {
std::string tiling_data_;
std::vector<void *> workspaces_;
NodePtr node_;

uint32_t tiling_key_ = 0;
void* handle_ = nullptr;
std::string original_kernel_key_;
std::string node_info_;
};

class AiCpuBaseTask : public OpTask {
@@ -115,7 +123,7 @@ class AiCpuBaseTask : public OpTask {
~AiCpuBaseTask() override;
UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
Status UpdateArgTable(const SingleOpModelParam &param) override;
uint32_t GetTaskType() const override;
const std::string &GetTaskType() const override;

protected:
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);


+ 144
- 44
ge/single_op/task/tbe_task_builder.cc View File

@@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() {
}
}

HandleHolder::HandleHolder(void *bin_handle)
: bin_handle_(bin_handle) {}

HandleHolder::~HandleHolder() {
if (bin_handle_ != nullptr) {
GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
}
}

const char *KernelBinRegistry::GetUnique(const string &stub_func) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = unique_stubs_.find(stub_func);
@@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr<
return ret.second;
}

TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def)
bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) {
auto ret = registered_handles_.emplace(std::move(holder));
return ret.second;
}

TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def)
: node_(node),
op_desc_(node->GetOpDesc()),
kernel_def_(kernel_def),
task_def_(task_def),
kernel_def_(task_def.kernel()),
kernel_def_with_handle_(task_def.kernel_with_handle()),
stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {}

Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle,
@@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi
binary.data = kernel_bin.GetBinData();
binary.length = kernel_bin.GetBinDataSize();
binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC;
auto ret = rtDevBinaryRegister(&binary, bin_handle);
Status ret = 0;
if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
ret = rtRegisterAllKernel(&binary, bin_handle);
} else {
ret = rtDevBinaryRegister(&binary, bin_handle);
}
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
param.core_type, static_cast<int>(ret));
return ret;
}
@@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam

Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle,
const SingleOpModelParam &param) {
std::string kernel_name;
GetKernelName(op_desc_, kernel_name);

void *handle = nullptr;
auto ret = DoRegisterBinary(tbe_kernel, &handle, param);
if (ret != SUCCESS) {
return ret;
}
if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
*bin_handle = handle;
return SUCCESS;
}

ret = DoRegisterMeta(handle);
if (ret != SUCCESS) {
@@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const
return ret;
}

std::string kernel_name;
GetKernelName(op_desc_, kernel_name);
ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str());
if (ret != SUCCESS) {
GE_CHK_RT(rtDevBinaryUnRegister(handle));
@@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam

void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param);
if (ret == SUCCESS) {
holder->SetBinHandle(bin_handle);
if (!registry.AddKernel(stub_name_, std::move(holder))) {
// should not happen. only one thread can reach here
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
holder->SetBinHandle(bin_handle);
if (!registry.AddKernel(stub_name_, std::move(holder))) {
// should not happen. only one thread can reach here
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
}

@@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam
return SUCCESS;
}

Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param) {
GELOGD("RegisterKernelWithHandle begin.");
HandleRegistry &registry = HandleRegistry::GetInstance();
auto tbe_kernel = GetTbeKernel(op_desc_);
if (tbe_kernel == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
handle_ = bin_handle;
auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

return SUCCESS;
}

Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const {
const std::string &sm_desc_str = kernel_def_.sm_desc();
if (sm_desc_str.empty()) {
@@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param
}
}

auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet));
return rtRet;
auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret));
return rt_ret;
}

rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rtRet != RT_ERROR_NONE) {
rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
(void)rtMemFreeManaged(*sm_desc);
GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet));
return rtRet;
GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret));
return rt_ret;
}
}

@@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);

auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet));
return RT_ERROR_TO_GE_STATUS(rtRet);
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

const domi::KernelContext &context = kernel_def_.context();
@@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet));
return RT_ERROR_TO_GE_STATUS(rtRet);
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);

return SUCCESS;
}

Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param,
const OpDescPtr &op_desc) {
size_t arg_size = kernel_def_with_handle_.args_size();
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);

auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return rt_ret;
}

const domi::KernelContext &context = kernel_def_with_handle_.context();
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint16_t offset = *args_offset_tmp;

bool is_dynamic = false;
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
if (is_dynamic) {
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
} else {
// copy args
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
return rt_ret;
}
}
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
kernel_def_with_handle_);

return SUCCESS;
}

Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
GELOGD("Build tbe task begin");
auto ret = SetKernelArgs(task, param, op_desc_);
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) :
SetKernelArgs(task, param, op_desc_);
if (ret != SUCCESS) {
return ret;
}

ret = RegisterKernel(task, param);
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) :
RegisterKernel(task, param);
task.SetHandle(handle_);
if (ret != SUCCESS) {
return ret;
}

auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_);
GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str());

void *stub_func = nullptr;
auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
if (rtRet != SUCCESS) {
GELOGE(rtRet, "rtGetFunctionByName failed.");
return RT_ERROR_TO_GE_STATUS(rtRet);
if (task_type != RT_MODEL_TASK_ALL_KERNEL) {
void *stub_func = nullptr;
auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
if (rt_ret != SUCCESS) {
GELOGE(rt_ret, "rtGetFunctionByName failed.");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
task.SetStubFunc(stub_name_, stub_func);
}

task.SetStubFunc(stub_name_, stub_func);
return SUCCESS;
}

@@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
int64_t max_size = -1;
(void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size);
GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size);
if (max_size <= 0) {
if (max_size < 0) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size);
return ACL_ERROR_GE_PARAM_INVALID;
}

void *tiling_buffer = nullptr;
GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
GE_CHECK_NOTNULL(tiling_buffer);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
if (max_size > 0) {
GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
GE_CHECK_NOTNULL(tiling_buffer);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
}

task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
return SUCCESS;


+ 32
- 1
ge/single_op/task/tbe_task_builder.h View File

@@ -42,6 +42,19 @@ class KernelHolder {
std::shared_ptr<ge::OpKernelBin> kernel_bin_;
};

class HandleHolder {
public:
HandleHolder(void *bin_handle);
~HandleHolder();

void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }

private:
friend class HandleRegistry;
void *bin_handle_ = nullptr;
};

class KernelBinRegistry {
public:
static KernelBinRegistry &GetInstance() {
@@ -61,9 +74,22 @@ class KernelBinRegistry {
std::mutex mutex_;
};

class HandleRegistry {
public:
static HandleRegistry &GetInstance() {
static HandleRegistry instance;
return instance;
}

bool AddHandle(std::unique_ptr<HandleHolder> &&holder);

private:
std::set<std::unique_ptr<HandleHolder>> registered_handles_;
};

class TbeTaskBuilder {
public:
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def);
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def);
~TbeTaskBuilder() = default;

Status BuildTask(TbeOpTask &task, const SingleOpModelParam &param);
@@ -71,9 +97,11 @@ class TbeTaskBuilder {
private:
Status InitTilingInfo(TbeOpTask &task);
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const;

Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param);
Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param);
Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle,
const SingleOpModelParam &param);
Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam &param) const;
@@ -83,8 +111,11 @@ class TbeTaskBuilder {

const NodePtr node_;
const OpDescPtr op_desc_;
const domi::TaskDef &task_def_;
const domi::KernelDef &kernel_def_;
const domi::KernelDefWithHandle &kernel_def_with_handle_;
const std::string stub_name_;
void *handle_ = nullptr;
};
} // namespace ge



+ 4
- 0
inc/external/ge/ge_api.h View File

@@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString
// Finalize GE, release all resources
GE_FUNC_VISIBILITY Status GEFinalize();

GE_FUNC_VISIBILITY std::string GEGetErrorMsg();

GE_FUNC_VISIBILITY std::string GEGetWarningMsg();

class GE_FUNC_VISIBILITY Session {
public:
ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))


+ 5
- 13
inc/framework/common/ge_types.h View File

@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

// profiling data
const uint32_t kTaskTypeAicore = 0;
const uint32_t kTaskTypeAicpu = 1;
const uint32_t kTaskTypeInvalid = 0xFFFF;
const std::string kTaskTypeAicore = "AI_CORE";
const std::string kTaskTypeAicpu = "AI_CPU";
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";

// Data cache, including data address and length
struct DataBuffer {
@@ -251,27 +251,19 @@ struct Options {
struct TaskDescInfo {
std::string model_name;
std::string op_name;
std::string op_type;
uint32_t block_dim;
uint32_t task_id;
uint32_t stream_id;
std::string shape_type;
int64_t cur_iter_num;
uint32_t task_type;
};

// Profiling info of graph
struct ComputeGraphDescInfo {
std::string model_name;
std::string op_name;
std::string op_type;
std::string task_type;
std::vector<Format> input_format;
std::vector<std::vector<int64_t>> input_shape;
std::vector<DataType> input_data_type;
std::vector<Format> output_format;
std::vector<std::vector<int64_t>> output_shape;
std::vector<DataType> output_data_type;
uint32_t task_id;
uint32_t stream_id;
};

struct OpDescInfo {


+ 6
- 0
inc/framework/executor/ge_executor.h View File

@@ -260,12 +260,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
SingleOp **single_op);

static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id);

static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs);

static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op);

static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id);

static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);


+ 2
- 1
inc/framework/generator/generator_api.h View File

@@ -55,7 +55,8 @@ typedef void *OpTensor_t;
/// @return 0 for success / others for fail
///
GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num,
const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file);
const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr,
const char *om_file);

///
/// @ingroup ge


+ 2
- 1
inc/framework/memory/memory_api.h View File

@@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME
/// \param var_info [in] host variable addr infos.
/// \param mem_type [in] memory type for rdma pool.
/// \return Status result of function
GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM);
GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info,
rtMemType_t mem_type = RT_MEMORY_HBM);

///
/// \param tensor_info [in] description for tensor stored shared memory.


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit a2b80cb22a62a6757c7dd31e684ca632e0b79268
Subproject commit 4a9bfd772cad72ff281a2e21d59b8d225a26789c

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit cfabf622b803d5957563a73652a0ce5086aab99d
Subproject commit 86162f60807c063f7344f902e443fc99657be637

+ 0
- 1
tests/CMakeLists.txt View File

@@ -19,7 +19,6 @@ add_subdirectory(depends/cce)
add_subdirectory(depends/slog)
add_subdirectory(depends/mmpa)
add_subdirectory(depends/runtime)
add_subdirectory(depends/omg)
add_subdirectory(depends/hccl)
add_subdirectory(depends/profiler)
add_subdirectory(depends/error_manager)


+ 5
- 0
tests/depends/mmpa/CMakeLists.txt View File

@@ -29,6 +29,11 @@ include_directories(${GE_CODE_DIR}/inc/framework)
include_directories(${GE_CODE_DIR}/metadef/inc/external)

add_library(mmpa_stub SHARED ${SRCS})

target_compile_options(mmpa_stub PRIVATE
-g
)

target_link_libraries(mmpa_stub PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed


+ 7
- 3
tests/depends/mmpa/src/mmpa_stub.cc View File

@@ -231,8 +231,12 @@ INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone)
INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen)
{
INT32 ret = EN_OK;
char *pRet = realpath(path, realPath);
if (pRet == NULL) {
if (path == nullptr || realPath == nullptr || realPathLen < MMPA_MAX_PATH) {
return EN_INVALID_PARAM;
}

char *ptr = realpath(path, realPath);
if (ptr == nullptr) {
ret = EN_ERROR;
}
return ret;
@@ -260,7 +264,7 @@ INT32 mmDlclose(VOID *handle)

CHAR *mmDlerror()
{
return "";
return dlerror();
}

INT32 mmDladdr(VOID *addr, mmDlInfo *info)


+ 0
- 59
tests/depends/omg/CMakeLists.txt View File

@@ -1,59 +0,0 @@
# Copyright 2019-2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

#cmake_minimum_required(VERSION 2.8)

project(OMG_CCE)

set(CMAKE_CXX_STANDARD 11)

include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc)
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce)
include_directories(${GE_CODE_DIR}/inc)
include_directories(${GE_CODE_DIR}/metadef/inc)
include_directories(${GE_CODE_DIR}/inc/framework)
include_directories(${GE_CODE_DIR}/metadef/inc/graph)
include_directories(${GE_CODE_DIR}/inc/external)
include_directories(${GE_CODE_DIR}/metadef/inc/external)
include_directories(${GE_CODE_DIR}/metadef/inc/external/graph)
include_directories(${GE_CODE_DIR}/ge)
include_directories(${CMAKE_BINARY_DIR})
include_directories(${CMAKE_BINARY_DIR}/proto/ge)
set(PROTO_LIST
"${GE_CODE_DIR}/metadef/proto/om.proto"
"${GE_CODE_DIR}/metadef/proto/task.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

set(SRCS
# "${GE_CODE_DIR}/src/ge/common/util.cc"
"src/omg_stub.cc"
)

add_library(omg_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS})

target_compile_definitions(omg_stub PRIVATE
google=ascend_private
)

target_link_libraries(omg_stub PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
ascend_protobuf
-Wl,--as-needed
c_sec
json
)

+ 0
- 878
tests/depends/omg/src/omg_stub.cc View File

@@ -1,878 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <map>
#include <fstream>
#include <unordered_map>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>

#include "mmpa/mmpa_api.h"
#include "common/debug/log.h"
#include "common/debug/memory_dumper.h"
#include "common/types.h"
#include "common/util.h"
#include "common/string_util.h"
#include "common/properties_manager.h"
#include "common/model_parser/base.h"
#include "graph/model.h"
#include "cce/dnn.h"
#include "ge/ge_api_types.h"
#include "framework/common/ge_types.h"
#include "graph/utils/op_desc_utils.h"
#include "common/profiling/profiling_manager.h"

using domi::domiTensorFormat_t;
using namespace cce;
using namespace ge;

struct PROC_PARAM {
uint8_t *model_name;

// ISV Ek buffer
uint8_t *model_key;
uint32_t model_key_len;

// ISV root certificate buffer
uint8_t *root_cert;
uint32_t root_cert_len;

// ISV private key buffer
uint8_t *pri_key;
uint32_t pri_key_len;

// Raw AI Module Image buffer
uint8_t *ai_image;
uint32_t ai_image_len;

// ISV HW key buffer
uint8_t *hw_key;
uint32_t hw_key_len;
};

#ifdef __cplusplus
extern "C" {
#endif
using namespace ge;
namespace {
const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR";
const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework";
const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result";
const char FILE_SEPARATE[] = "/";
} // namespace

#ifdef __cplusplus
}
#endif

namespace ge {
struct GeModelPartition {
ModelPartitionType type_ = MODEL_DEF;
uint8_t *data_ = nullptr;
size_t size_ = 0;

GeModelPartition() = default;

GeModelPartition(const GeModelPartition &partition){};

GeModelPartition &operator=(const GeModelPartition &partition) = delete;

~GeModelPartition() {
if (data_ != nullptr) {
delete[] data_;
data_ = nullptr;
}
}

Status SetData(uint8_t *data, size_t size) {
size_ = size;
data_ = new (std::nothrow) uint8_t[size]();
errno_t err;
err = memcpy_s(data_, size_, data, size);
if (err) {
GELOGE(ge::FAILED, "[GeModel Partition] Error occur when copy GeModel Partition data.");
return FAILED;
}
return SUCCESS;
}

Status SetType(ModelPartitionType type) {
type_ = type;
return SUCCESS;
}
};
struct OmFileContext {
vector<GeModelPartition> partition_datas_;
vector<char> partition_table_;
uint32_t model_data_len_;
};

class SubGraphInfo;
using SubGraphInfoPtr = std::shared_ptr<ge::SubGraphInfo>;

using GeModelPartitionPtr = std::shared_ptr<GeModelPartition>;
using ModelPtr = std::shared_ptr<ge::Model>;
class GeModel {
public:
explicit GeModel(const ModelPtr &model_ptr);
~GeModel() = default;
GeModel(const GeModel &other) = delete;
GeModel &operator=(const GeModel &other) = delete;

ModelPtr GetModelPtr() const;
Status AddPartition(uint8_t *data, size_t size, ModelPartitionType type);
Status GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition);
uint8_t GetPlatformType() const;
void SetPlatformType(const uint8_t platform_type) { platform_type_ = platform_type; }

private:
std::map<ModelPartitionType, GeModelPartitionPtr> partitions_;
ModelPtr model_ = nullptr;
uint8_t platform_type_ = {0};
};
using GeModelPtr = std::shared_ptr<ge::GeModel>;

GeModel::GeModel(const ModelPtr &model_ptr) { this->model_ = model_ptr; }

ModelPtr GeModel::GetModelPtr() const { return this->model_; }

uint8_t GeModel::GetPlatformType() const { return platform_type_; }

Status GeModel::AddPartition(uint8_t *data, size_t size, ModelPartitionType type) {
if (size == 0) {
return FAILED;
}

if (data == nullptr) {
return FAILED;
}

auto iter = partitions_.find(type);
if (iter != partitions_.end()) {
return FAILED;
}

GeModelPartitionPtr partition = nullptr;
GE_MAKE_SHARED(partition = std::make_shared<ge::GeModelPartition>(), return FAILED);
Status ret = partition->SetType(type);
if (ret != SUCCESS) {
return FAILED;
}
ret = partition->SetData(data, size);
if (ret != SUCCESS) {
return FAILED;
}

partitions_.insert(std::pair<ModelPartitionType, GeModelPartitionPtr>(type, partition));
return SUCCESS;
}

Status GeModel::GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition) {
auto iter = partitions_.find(type);
if (iter == partitions_.end()) {
return FAILED;
}

partition = iter->second;
return SUCCESS;
}
class OmFileSaveHelper {
public:
OmFileSaveHelper();
~OmFileSaveHelper();
vector<GeModelPartition> &GetModelPartitions();
ModelPartitionTable *GetPartitionTable();
ModelFileHeader model_header_;
ModelFileHeader &GetModelFileHeader() { return model_header_; }
void AddPartition(GeModelPartition &partition);

private:
OmFileContext context_;
};

OmFileSaveHelper::OmFileSaveHelper() {}

OmFileSaveHelper::~OmFileSaveHelper() {}

vector<GeModelPartition> &OmFileSaveHelper::GetModelPartitions() {
static std::vector<GeModelPartition> tmp;
return tmp;
}

ModelPartitionTable *OmFileSaveHelper::GetPartitionTable() { return nullptr; }

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OmFileSaveHelper::AddPartition(GeModelPartition &partition) {
context_.partition_datas_.push_back(partition);
context_.model_data_len_ += partition.size_;
}
class ModelBuilder {
public:
ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs,
const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode);
virtual ~ModelBuilder();
Status BuildModel(ge::Model &model_def);
Status SaveWeightsToModel(ge::Model &model);
Status SaveDataToModel(ge::Model &model, ge::GeModel &ge_model);
Status PreBuildModel();
Status BuildModelForGetTask(ge::Model &model_def);
ge::Buffer GetWeightBuffer() const;
void SetModelVersion(ge::Model &model_def);

public:
ge::Buffer weight_buffer_;
};

ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs,
const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) {
weight_buffer_ = ge::Buffer(4100000);
}

ModelBuilder::~ModelBuilder() {}

Status ModelBuilder::SaveWeightsToModel(ge::Model &model) { return SUCCESS; }

Status ModelBuilder::BuildModel(ge::Model &model_def) { return SUCCESS; }

Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { return SUCCESS; }

Status ModelBuilder::PreBuildModel() { return SUCCESS; }

Status ModelBuilder::BuildModelForGetTask(ge::Model &model_def) { return SUCCESS; }

void ModelBuilder::SetModelVersion(ge::Model &model_def) { return; }

ge::Buffer ModelBuilder::GetWeightBuffer() const { return ge::Buffer(4100000); }

} // namespace ge

using ProcParam = struct PROC_PARAM;

namespace ge {
#include <iostream>
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N = 0;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C = 1;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H = 2;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W = 3;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N = 0;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H = 1;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W = 2;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C = 3;

const uint32_t MODEL_FILE_MAGIC_NUM = 0x444F4D49;
const uint32_t MODEL_FILE_HEAD_LEN = 256;
const uint32_t MODEL_VERSION = 0x10000000;
const int MAX_FILE_SIZE_LIMIT = INT_MAX;
bool FC_WEIGHT_COMPRESS_FLAG = false;

bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length) {
length = 10;
*buffer = new (std::nothrow) char[10]();
GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(*buffer == nullptr, false, "new an object failed.");
return true;
}
bool ReadProtoFromText(const char *file, google::protobuf::Message *message) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((nullptr == file || nullptr == message), return false,
"incorrect parameter. nullptr == file || nullptr == message");
string real_path = RealPath(file);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "proto file path '%s' not valid", file);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path.c_str()) == -1, return false, "file size not valid.");
std::ifstream fs(real_path.c_str(), std::ifstream::in);

if (!fs.is_open()) {
GELOGE(ge::FAILED, "proto file '%s' open fail.", file);
return false;
}
google::protobuf::io::IstreamInputStream input(&fs);
bool ret = google::protobuf::TextFormat::Parse(&input, message);
GE_IF_BOOL_EXEC(ret != true,
GELOGI("call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file."));
fs.close();
return ret;
}

uint64_t GetCurrentTimestap() { return 0; }

// get length of file
long GetFileLength(const std::string &input_file) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(input_file.empty(), return -1, "input_file path is null.");
string real_path = RealPath(input_file.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str());
unsigned long long file_length = 0;
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1,
"open file failed.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.",
file_length, MAX_FILE_SIZE_LIMIT);
return file_length;
}
string RealPath(const char *path) {
string s = path;
if (s.size() >= PATH_MAX) {
return "";
}
if (s == "." || s == "1") {
return path;
// for insert_aipp_op unittest
} else if (s.substr(0, 3) == "llt") {
return path;
} else {
return "22";
}
}

bool CheckInputPathValid(const string &file_path) { return true; }
bool ReadProtoFromArray(const void *data, int size, Message *proto) { return true; }

struct ModelPartition {
ModelPartitionType type;
uint8_t *data = 0;
uint32_t size = 0;
};

class InsertNewOpUtil {
public:
InsertNewOpUtil();
~InsertNewOpUtil();
Status InsertNewOps(const ComputeGraphPtr &graph);
Status InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path);
Status Parse(const char *conf_path);
};

InsertNewOpUtil::InsertNewOpUtil() {}

Status InsertNewOpUtil::InsertNewOps(const ComputeGraphPtr &graph) { return SUCCESS; }

Status InsertNewOpUtil::InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path) { return SUCCESS; }

Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; }

Status InitOME() { return SUCCESS; }
class GraphOptimizer {
public:
Status Optimize();
Status OptimizeAfterCal();
Status AdjustDataOpDesc();
Status InsertTransOp();
Status FusionFmkop();
Status Optimize4Cloud();
Status Optimize4FlowCtrl();
Status OptimizeBeforeBuild();
};
Status GraphOptimizer::Optimize() { return SUCCESS; }

Status Init(Options options) { return SUCCESS; }

Status Shutdown(Options options) { return SUCCESS; }

class Session {
public:
// singleton
static Session *Instance();
const uint32_t &DeviceId() const;
};

const uint32_t &Session::DeviceId() const { return 0; }

Session *Session::Instance() {
static Session instance;
return &instance;
}
struct OmgContext {
domiTensorFormat_t format;

// get input format from cmd
std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
std::vector<domiTensorFormat_t> output_formats;

// user-designate input dims
std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
// global input dims
std::map<std::string, std::vector<int64_t>> input_dims;

// solve rename op e.g: Detectionoutput:SsdDetectiontOutput
std::map<std::string, std::string> op_conf_map;
// save output node of network: key is op name, value = index, index is the output index of op
std::map<std::string, std::vector<int32_t>> out_nodes_map;
// user-designate out nodes (this is used for determing the orders)
std::vector<std::pair<std::string, int32_t>> user_out_nodes;
// save the path of cutsom_aicpu
std::vector<std::string> aicpu_op_run_paths;
// save ddk
std::string ddk_version;
// save format
domiTensorFormat_t net_format;

FrameworkType type;
// RunMode run_mode;
bool train_flag = false;

std::string output_type;

/// save the name of network
/// eg:faster-rcnn, based on FirstStageProcessor after scope_fusion is faster-rcnn
/// then reorder conv+reshape of FirstStageBoxPredictor/BoxEncodingPredictor
/// need to delete op of reshape
std::string net_name;
};
} // namespace ge

namespace domi {
ge::OmgContext &GetContext() {
static ge::OmgContext tmp;
return tmp;
}
} // namespace domi

namespace ge {
class OpUtils {
public:
static Status InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor);
static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt);
static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor);
};
Status OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor) {
ccCreatePoolingMaskDescriptor(&cc_tensor);
return SUCCESS;
}
Status OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) {
Status ret = SUCCESS;
return ret;
}

class FileSaver {
public:
Status SaveToFile(const string &file_path, ModelFileHeader &model_file_header,
ModelPartitionTable &model_partition_table, const std::vector<ModelPartition> &partition_datas);
Status SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param,
const ModelFileHeader *model_file_header, bool check_sum);
};

Status FileSaver::SaveToFile(const string &file_path, ModelFileHeader &model_file_header,
ModelPartitionTable &model_partition_table,
const std::vector<ModelPartition> &partition_datas) {
return SUCCESS;
}

Status FileSaver::SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param,
const ModelFileHeader *model_file_header, bool check_sum) {
return SUCCESS;
}

class ModelSaver : public FileSaver {};

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor(
ccTensorDescriptor_t &cc_tensor) {
if (nullptr != cc_tensor) {
ccStatus_t ret = ccDestroyTensorDescriptor(&cc_tensor);
GE_LOGE_IF(CC_STATUS_SUCCESS != ret, "ccDestroyTensorDescriptor failed. ret = %d", ret);
cc_tensor = nullptr;
}
}

} // namespace ge

namespace domi {
class OpRegistrationData {};

class OpRegistry {
public:
static OpRegistry *Instance();
std::vector<OpRegistrationData> registration_datas;

ImplyType GetImplyType(const std::string &op_type);
void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type);
};

OpRegistry *OpRegistry::Instance() {
static OpRegistry instance;
return &instance;
}

void OpRegistry::GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type) {
if (imply_type == ImplyType::AI_CPU) {
vec_op_type.push_back("square");
}
}

class OpRegistrationTbe {
public:
static OpRegistrationTbe *Instance();

bool Finalize(OpRegistrationData &reg_data, bool is_train);
};

OpRegistrationTbe *OpRegistrationTbe::Instance() {
static OpRegistrationTbe instance;
return &instance;
}

bool OpRegistrationTbe::Finalize(OpRegistrationData &reg_data, bool is_train) { return true; }
} // namespace domi

namespace ge {
class GraphPrepare {
private:
Status OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph);
};

Status GraphPrepare::OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph) { return SUCCESS; }
} // namespace ge

namespace ge {

Status GetOriginalType(const ge::NodePtr &node, string &type) {
type = node->GetType();
GE_IF_BOOL_EXEC(type != FRAMEWORKOP, return SUCCESS);
ge::AttrUtils::GetStr(node->GetOpDesc(), "original_type", type);
return SUCCESS;
}

Status SetCycleEvent(const ge::NodePtr &node) { return SUCCESS; }

Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = AttrUtils::CloneOpDesc(node->GetOpDesc());
GE_CHECK_NOTNULL(tmp_desc);

if (!AttrUtils::SetStr(tmp_desc, "_stream_label", label)) {
GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetActiveLabelList(const ge::NodePtr &node, const std::vector<std::string> &label) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);
// add list of active_label
if (!AttrUtils::SetListStr(tmp_desc, "_active_label", label)) {
GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &branch_label) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);
// add branch_label of switch
if (!AttrUtils::SetStr(tmp_desc, "_switch_branch_node_label", branch_label)) {
GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);
// add switch_true_branch_flag
if (!AttrUtils::SetBool(tmp_desc, "_switch_true_branch_flag", value)) {
GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);
// record original_node_name
if (!AttrUtils::SetStr(tmp_desc, "_original_node_name", orig_name)) {
GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetCyclicDependenceFlag(const ge::NodePtr &node) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);
// add cyclic_dependence_flag
if (!AttrUtils::SetBool(tmp_desc, "_cyclic_dependence_flag", true)) {
GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

Status SetNextIteration(const ge::NodePtr &node, const std::string &next) {
GE_CHECK_NOTNULL(node);
OpDescPtr tmp_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(tmp_desc);

if (!AttrUtils::SetStr(tmp_desc, "_next_iteration_node", next)) {
GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}
} // namespace ge

namespace cce {
bool ccGetFuncState(ccFuncParamType_t type) { return true; }
} // namespace cce

namespace ge {
Status UnloadModel(uint32_t model_id) { return SUCCESS; }

Status GetInputOutputDescInfo(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc) {
return SUCCESS;
}

Status DataInput(const InputData *input_data, OutputData *output_data) { return SUCCESS; }
/*
class ModelManager {
public:
static std::shared_ptr<ModelManager> GetInstance();
static void FinalizeForPtr(ModelManager *) {}
Status DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs,
std::vector<ge::TensorInfo> &outputs);
Status DataInput(const InputData &input_data, OutputData &output_data);
Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc);
Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats);
Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats, std::vector<uint32_t> &output_formats);
Status Stop(uint32_t model_id);
Status Unload(uint32_t model_id);
Status LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model,
std::shared_ptr<ModelListener> listener);
Status Start(uint32_t model_id);
Status GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size);
Status LoadModelOffline(uint32_t &model_id, const ModelData &model, std::shared_ptr<ModelListener> listener = nullptr,
void *dev_ptr = nullptr, size_t mem_size = 0, void *weight_ptr = nullptr,
size_t weight_size = 0);
Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids);

Status HandleCommand(const Command &command);
Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data);
void DestroyAicpuSession(uint64_t session_id);
};
void ModelManager::DestroyAicpuSession(uint64_t session_id) {}
std::shared_ptr<ModelManager> ModelManager::GetInstance() {
static std::shared_ptr<ModelManager> instance_ptr =
shared_ptr<ModelManager>(new ModelManager(), ModelManager::FinalizeForPtr);
return instance_ptr;
}

Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs,
std::vector<ge::TensorInfo> &outputs) {
return SUCCESS;
}

Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { return SUCCESS; }

Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
return SUCCESS;
}

Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc) {
return SUCCESS;
}

Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<InputOutputDescInfo> &input_desc,
std::vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
return SUCCESS;
}

Status ModelManager::Stop(uint32_t model_id) { return SUCCESS; }

Status ModelManager::Unload(uint32_t model_id) { return SUCCESS; }

Status ModelManager::LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model,
std::shared_ptr<ModelListener> listener) {
return SUCCESS;
}

Status ModelManager::Start(uint32_t model_id) { return SUCCESS; }

Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { return SUCCESS; }

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
return SUCCESS;
}

Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data,
const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids) {
return SUCCESS;
}

Status ModelManager::HandleCommand(const Command &command) { return SUCCESS; }

Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data) {
return SUCCESS;
}

*/

} // namespace ge

namespace ge {

enum JobState {
JOBSTATE_WAITING = 1,
JOBSTATE_RUNNING,
JOBSTATE_KILLING,
JOBSTATE_SUCCEED,
JOBSTATE_FAILED,
JOBSTATE_KILLED,
JOBSTATE_UNKOWN
};

enum JobSubState {
JOBSUBSTATE_ENV_INIT = 201,
JOBSUBSTATE_ENV_FIN,
JOBSUBSTATE_RESOUCE_ALLOC,
JOBSUBSTATE_MODEL_COMPILE,
JOBSUBSTATE_GRAPH_PREPARE,
JOBSUBSTATE_GRAPH_SPLIT,
JOBSUBSTATE_GRAPH_OPTIMIZE,
JOBSUBSTATE_GRAPH_BUILD,
JOBSUBSTATE_GRAPH_LOAD,
JOBSUBSTATE_GRAPH_EXEC,
JOBSUBSTATE_GRAPH_UNLOAD,
JOBSUBSTATE_OTHER
};

enum ErrorModule {
ERROR_MODULE_DRIVER = 0x01,
ERROR_MODULE_RUNTIME = 0x04,
ERROR_MODULE_CCE = 0x06,
ERROR_MODULE_FMK = 0x08,
ERROR_MODULE_HCCL = 0x12
};

class CsaInteract {
public:
CsaInteract &GetInstance();
void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state);
void Init(int32_t dev_index, int64_t job_id);
Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER,
uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK);
// device index
int32_t dev_index_;
// job id
int64_t job_id_;
// is initialization complete
bool is_init_;
// current job state
JobState curr_state_;
// job state file
std::string job_state_file_;
// network connectivity detect file
std::string hcom_detect_file_;
// identification of internal errors that occurred during the training
bool is_have_internal_error_;
};

CsaInteract &CsaInteract::GetInstance() {
static CsaInteract instance;
return instance;
}

void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
if (!is_init_) {
dev_index_ = dev_index;
job_id_ = job_id;
string csa_path_prefix;
if (std::getenv(FMK_STATUS_FILE_DIR_ENV) != nullptr) {
csa_path_prefix = std::getenv(FMK_STATUS_FILE_DIR_ENV);
}
if (!csa_path_prefix.empty()) {
std::string job_state_file = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME;
std::string hcom_detect_file =
csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME;
job_state_file_ = RealPath(job_state_file.c_str());
hcom_detect_file_ = RealPath(hcom_detect_file.c_str());
}
is_init_ = true;
}
}

void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) {}

} // namespace ge

Status ModelParserBase::LoadFromFile(const char *model_path, const char *key, int32_t priority,
ge::ModelData &model_data) {
return SUCCESS;
}

Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode,
ErrorModule error_module) {
return SUCCESS;
}

namespace ge {

static std::map<ge::DataType, uint32_t> data_type_to_length = {
{DT_BOOL, sizeof(bool)}, {DT_INT64, sizeof(int64_t)}, {DT_UINT64, sizeof(int64_t)}, {DT_FLOAT, sizeof(float)},
{DT_INT32, sizeof(int32_t)}, {DT_UINT32, sizeof(int32_t)}, {DT_INT8, sizeof(char)}, {DT_UINT8, sizeof(char)},
{DT_INT16, sizeof(int16_t)}, {DT_UINT16, sizeof(int16_t)}, {DT_FLOAT16, sizeof(int16_t)}, {DT_DOUBLE, sizeof(double)},
};

class TypeUtils {
public:
static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length);
static bool CheckUint64MulOverflow(uint64_t a, uint32_t b);
};

bool TypeUtils::GetDataTypeLength(ge::DataType data_type, uint32_t &length) {
auto it = data_type_to_length.find(data_type);
if (it != data_type_to_length.end()) {
length = it->second;
return true;
} else {
return false;
}
}

bool TypeUtils::CheckUint64MulOverflow(uint64_t a, uint32_t b) {
// Not overflow
if (a == 0) {
return false;
}
if ((ULLONG_MAX / a) >= b) {
return false;
}
return true;
}
} // namespace ge

+ 12
- 4
tests/depends/runtime/src/runtime_stub.cc View File

@@ -27,8 +27,8 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) {
}

rtError_t rtCtxGetCurrent(rtContext_t *ctx) {
int x = 1;
*ctx = (void *)x;
uintptr_t x = 1;
*ctx = (rtContext_t *)x;
return RT_ERROR_NONE;
}

@@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char

rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; }

rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) {
return RT_ERROR_NONE;
}

rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc,
rtStream_t stream) {
return RT_ERROR_NONE;
@@ -156,7 +163,7 @@ rtError_t rtSetKernelReportCallback(rtKernelReportCallback callback) {
rt_kernel_info.module_addr = (void *)100;
rt_kernel_info.module_size = 100;

rtStream_t stream;
rtStream_t stream = nullptr;
callback(stream, &rt_kernel_info);
return RT_ERROR_NONE;
}
@@ -193,7 +200,8 @@ rtError_t rtModelCreate(rtModel_t *model, uint32_t flag) {
}

rtError_t rtModelDestroy(rtModel_t model) {
delete model;
uint32_t *stub = static_cast<uint32_t *>(model);
delete stub;
return RT_ERROR_NONE;
}



+ 44
- 28
tests/ut/ge/CMakeLists.txt View File

@@ -18,23 +18,23 @@ project(ut_ge)
set(CMAKE_CXX_STANDARD 11)

set(PROTO_LIST
"${GE_CODE_DIR}/metadef/proto/om.proto"
"${GE_CODE_DIR}/metadef/proto/ge_ir.proto"
"${GE_CODE_DIR}/metadef/proto/ge_api.proto"
"${GE_CODE_DIR}/metadef/proto/insert_op.proto"
"${GE_CODE_DIR}/metadef/proto/dump_task.proto"
"${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto"
"${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto"
"${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto"
"${GE_CODE_DIR}/metadef/proto/ge_api.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto"
"${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto"
)
"${GE_CODE_DIR}/metadef/proto/om.proto"
"${GE_CODE_DIR}/metadef/proto/ge_ir.proto"
"${GE_CODE_DIR}/metadef/proto/ge_api.proto"
"${GE_CODE_DIR}/metadef/proto/insert_op.proto"
"${GE_CODE_DIR}/metadef/proto/dump_task.proto"
"${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto"
"${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto"
"${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto"
"${GE_CODE_DIR}/metadef/proto/ge_api.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto"
"${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto"
"${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

@@ -135,6 +135,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/types.cc"
"${GE_CODE_DIR}/ge/common/fmk_error_codes.cc"
"${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc"
"${GE_CODE_DIR}/ge/common/context/ctx.cc"
"${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc"
"${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc"
"${GE_CODE_DIR}/ge/generator/ge_generator.cc"
@@ -163,7 +164,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/model/ge_root_model.cc"
"${GE_CODE_DIR}/ge/common/model_parser/base.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_server.cc"
@@ -266,8 +267,8 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
"${GE_CODE_DIR}/ge/model/ge_model.cc"
"${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
@@ -393,14 +394,13 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES
"${GE_CODE_DIR}/ge/graph/manager/util/debug.cc"
"${GE_CODE_DIR}/ge/common/properties_manager.cc"
"${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc"
"${GE_CODE_DIR}/ge/common/model_parser/base.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
"${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
"${GE_CODE_DIR}/ge/common/util.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc"
@@ -458,7 +458,7 @@ set(GRAPH_BUILD_COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc"
"${GE_CODE_DIR}/ge/common/thread_pool.cc"
"${GE_CODE_DIR}/ge/common/model_parser/base.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
"${GE_CODE_DIR}/ge/graph/build/run_context.cc"
"${GE_CODE_DIR}/ge/graph/common/local_context.cc"
)
@@ -627,7 +627,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
#"graph/load/new_model_manager_davinci_model_unittest.cc"
"graph/load/model_manager_unittest.cc"
#"graph/load/new_model_manager_task_build_unittest.cc"
"graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
"graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
"graph/load/end_graph_task_unittest.cc"
"graph/load/new_model_manager_event_manager_unittest.cc"
#"graph/load/output_net_output_unittest.cc"
@@ -638,7 +638,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
"graph/load/kernel_task_info_unittest.cc"
"graph/load/memcpy_addr_async_task_info_unittest.cc"
"graph/load/memcpy_async_task_info_unittest.cc"
"graph/load/cpu_queue_schedule_unittest.cc"
"graph/load/cpu_queue_schedule_unittest.cc"
#"graph/graph_load_unittest.cc"
"graph/ge_executor_unittest.cc"
"graph/load/model_helper_unittest.cc"
@@ -671,7 +671,7 @@ set(PASS_TEST_FILES
"graph/passes/trans_op_depth_fusion_pass_unittest.cc"
"graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc"
"graph/passes/constant_folding_pass_unittest.cc"
"graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc"
"graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc"
"graph/passes/stop_gradient_pass_unittest.cc"
"graph/passes/prevent_gradient_pass_unittest.cc"
"graph/passes/identity_pass_unittest.cc"
@@ -752,25 +752,38 @@ set(MULTI_PARTS_TEST_FILES
"graph/build/mem_assigner_unittest.cc"
"graph/preprocess/graph_preprocess_unittest.cc"
"graph/manager/hcom_util_unittest.cc"
"graph/manager/graph_caching_allocator_unittest.cc"
"session/omg_omg_unittest.cc"
)

set(GENERATOR_TEST_FILES
"generator/ge_generator_unittest.cc"
)

set(EXECUTOR_TEST_FILES
"executor/ge_executor_unittest.cc"
)

set(SINGLE_OP_TEST_FILES
#"single_op/single_op_model_unittest.cc"
"single_op/single_op_model_unittest.cc"
"single_op/single_op_manager_unittest.cc"
"single_op/stream_resource_unittest.cc"
"single_op/single_op_task_unittest.cc"
)

set(PROFILING_MNG_TEST_FILES
"profiling/ge_profiling_manager_unittest.cc"
)

set(HYBRID_TEST_FILES
"hybrid/ge_hybrid_unittest.cc"
)

set(OTHERS_TEST_FILES
"plugin_manager/ge_util_unittest.cc"
)

list(APPEND COMMON_SHARED_LIBRARIES
omg_stub
c_sec
slog_stub
cce_ge_stub
@@ -1055,10 +1068,13 @@ target_link_libraries(ut_libge_kernel_utest
# libge_distinct_load_utest
add_executable(ut_libge_distinct_load_utest
${COMMON_TEST_FILES}
${GENERATOR_TEST_FILES}
${EXECUTOR_TEST_FILES}
${DISTINCT_GRAPH_LOAD_TEST_FILES}
${DISTINCT_GRAPH_LOAD_SRC_FILES}
${SINGLE_OP_TEST_FILES}
${PROFILING_MNG_TEST_FILES}
${HYBRID_TEST_FILES}
)

target_compile_options(ut_libge_distinct_load_utest PRIVATE


+ 42
- 0
tests/ut/ge/executor/ge_executor_unittest.cc View File

@@ -0,0 +1,42 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#define private public
#define protected public
#include "executor/ge_executor.h"
#include "graph/utils/tensor_utils.h"

using namespace std;

namespace ge {
class UtestGeExecutor : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

TEST_F(UtestGeExecutor, test_single_op_exec) {
GeExecutor exeutor;
ModelData model_data;
string model_name = "1234";

EXPECT_EQ(exeutor.LoadSingleOp(model_name, model_data, nullptr, nullptr), ACL_ERROR_GE_INTERNAL_ERROR);
EXPECT_EQ(exeutor.LoadDynamicSingleOp(model_name, model_data, nullptr, nullptr), PARAM_INVALID);
}
} // namespace ge

+ 78
- 0
tests/ut/ge/generator/ge_generator_unittest.cc View File

@@ -0,0 +1,78 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#define private public
#define protected public
#include "generator/ge_generator.h"
#include "graph/utils/tensor_utils.h"

using namespace std;

namespace ge {
class UtestGeGenerator : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

TEST_F(UtestGeGenerator, test_build_single_op_offline) {
GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor_desc, 512);

shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add");
EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS);

GeTensor tensor(tensor_desc);
const vector<GeTensor> inputs = { tensor, tensor };
const vector<GeTensor> outputs = { tensor };

// not Initialize, impl is null.
GeGenerator generator;
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), PARAM_INVALID);

// const map<string, string> &options
generator.Initialize({});
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
}

/*
TEST_F(UtestGeGenerator, test_build_single_op_online) {
GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor_desc, 512);

shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add");
EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS);
EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS);

GeTensor tensor(tensor_desc);
const vector<GeTensor> inputs = { tensor, tensor };
const vector<GeTensor> outputs = { tensor };

// not Initialize, impl is null.
GeGenerator generator;
generator.Initialize({});
ModelBufferData model_buffer;
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED);
}
*/

} // namespace ge

+ 95
- 32
tests/ut/ge/graph/build/mem_assigner_unittest.cc View File

@@ -25,10 +25,12 @@
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "omg/omg_inner_types.h"
#include "../passes/graph_builder_utils.h"

#define protected public
#define private public
#include "graph/build/memory/binary_block_mem_assigner.h"
#include "graph/build/memory/graph_mem_assigner.h"
#include "graph/build/memory/hybrid_mem_assigner.h"
#include "graph/build/memory/max_block_mem_assigner.h"
#undef protected
@@ -41,7 +43,7 @@ using domi::GetContext;

class UtestMemoryAssignerTest : public testing::Test {
public:
ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
auto desc_temp = *desc_temp_ptr;
@@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test {
op_def->SetWorkspaceBytes(workspace_bytes);
return op_def;
}
void make_graph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
auto desc_temp = *desc_temp_ptr;

TensorUtils::SetSize(desc_temp, 1024);
op_def->AddInputDesc(desc_temp);

auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
auto desc_output = *desc_output_ptr;
TensorUtils::SetSize(desc_output, 6500);
ge::TensorUtils::SetReuseInput(desc_output, true);
ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
op_def->AddOutputDesc(desc_output);

std::vector<int64_t> workspace_bytes;
workspace_bytes.push_back(wsByte);
op_def->SetWorkspaceBytes(workspace_bytes);
return op_def;
}
void MakeGraph(ge::ComputeGraphPtr &graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
op_def_a->SetStreamId(0);
ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
op_def_b->SetStreamId(0);
ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000);
op_def_c->SetStreamId(1);
ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000);
op_def_d->SetStreamId(2);
ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000);
op_def_e->SetStreamId(3);
ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000);
op_def_f->SetStreamId(2);
ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000);
op_def_g->SetStreamId(3);
ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000);
ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000);
op_def_h->SetStreamId(2);
ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000);
ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000);
op_def_i->SetStreamId(2);
ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT);
ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT);
op_def_j->SetStreamId(3);

// add node
@@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test {
graph->TopologicalSorting();
}

void make_reuse_graph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000);

ge::OpDescPtr op_def_c = make_shared<ge::OpDesc>("C", "Some");
auto desc_input_ptr = make_shared<ge::GeTensorDesc>();
auto desc_input = *desc_input_ptr;

TensorUtils::SetSize(desc_input, 1024);
op_def_c->AddInputDesc(desc_input);

auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
auto desc_output = *desc_output_ptr;
TensorUtils::SetSize(desc_output, 6500);
ge::TensorUtils::SetReuseInput(desc_output, true);
ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
op_def_c->AddOutputDesc(desc_output);

void MakeReuseGraph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000);
ge::OpDescPtr op_def_d = make_shared<ge::OpDesc>("D", "CONSTANT");

ge::NodePtr node_a = graph->AddNode(op_def_a);
@@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test {
graph->TopologicalSorting();
}

ComputeGraphPtr MakeCascadeContinuousMemoryGraph() {
ge::ut::GraphBuilder builder("graph");
auto data = builder.AddNode("data", "Data", 1, 1);
auto addn1 = builder.AddNode("addn1", "AddN", 1, 1);
auto addn2 = builder.AddNode("addn2", "AddN", 1, 1);
auto addn3 = builder.AddNode("addn3", "AddN", 1, 1);
auto concat1 = builder.AddNode("concat1", "Concat", 2, 1);
auto concat2 = builder.AddNode("concat2", "Concat", 2, 1);
auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0);

ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true);

ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true);

addn1->GetOpDesc()->SetOutputOffset({100});
addn2->GetOpDesc()->SetOutputOffset({200});
concat1->GetOpDesc()->SetOutputOffset({100});
addn3->GetOpDesc()->SetOutputOffset({700});
concat2->GetOpDesc()->SetOutputOffset({500});

ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100});
ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200});
ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300});


builder.AddDataEdge(data, 0, addn1, 0);
builder.AddDataEdge(data, 0, addn2, 0);
builder.AddDataEdge(addn1, 0, concat1, 0);
builder.AddDataEdge(addn2, 0, concat1, 1);
builder.AddDataEdge(concat1, 0, concat2, 0);
builder.AddDataEdge(addn3, 0, concat2, 1);

return builder.GetGraph();
}

protected:
void SetUp() {}

@@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test {
/*
TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000);
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
ge::NodePtr node_a = graph->AddNode(op_def_a);
MemoryBlock* memory_block = new MemoryBlock(0);
memory_block->Init(1, kOutput, node_a, 0, 1);
@@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner {
// when check GetMemoryRanges return fail, Assign return fail
TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
make_graph(graph);
MakeGraph(graph);
std::map<std::string, std::string> anchor_to_symbol;
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS);
@@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) {
MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors);
EXPECT_EQ(mock_assigner.Assign(), FAILED);
}

TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph();
auto addn1 = graph->FindNode("addn1");
auto addn2 = graph->FindNode("addn2");
EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100);
EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200);
GraphMemoryAssigner memoryAssigner(graph);
MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS);
EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500);
EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
}

+ 20
- 1
tests/ut/ge/graph/ge_executor_unittest.cc View File

@@ -34,7 +34,6 @@
#include "common/types.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/load/model_manager/task_info/kernel_task_info.h"
#include "graph/load/model_manager/task_info/kernel_ex_task_info.h"
@@ -109,6 +108,26 @@ static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0);
return op_desc;
}

TEST_F(UtestGeExecutor, load_data_from_file) {
GeExecutor ge_executor;
ge_executor.isInit_ = true;

string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps";
string self_smap = "/proc/" + std::to_string(getpid()) + "/maps";
string copy_smap = "cp " + self_smap + " " + test_smap;
EXPECT_EQ(system(copy_smap.c_str()), 0);

ModelData model_data;
EXPECT_EQ(ge_executor.LoadDataFromFile(test_smap, model_data), SUCCESS);

EXPECT_NE(model_data.model_data, nullptr);
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;

ge_executor.isInit_ = false;
}

/*
TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) {
uint32_t model_id = 1;


+ 0
- 1
tests/ut/ge/graph/graph_load_unittest.cc View File

@@ -24,7 +24,6 @@
#include "common/helper/model_helper.h"
#include "common/op/ge_op_utils.h"
#include "common/types.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/op_desc.h"
#include "graph/types.h"
#include "graph/utils/attr_utils.h"


+ 7
- 0
tests/ut/ge/graph/load/davinci_model_unittest.cc View File

@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) {
model.SinkModelProfile();
}

TEST_F(UtestDavinciModel, Sink_time_profile) {
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
DavinciModel model(0, nullptr);
InputData current_data;
model.SinkTimeProfile(current_data);
}

} // namespace ge

+ 0
- 1
tests/ut/ge/graph/load/model_manager_unittest.cc View File

@@ -25,7 +25,6 @@
#include "common/op/ge_op_utils.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model_parser.h"

using namespace std;
using namespace testing;


+ 1
- 2
tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc View File

@@ -21,7 +21,7 @@

#include "common/debug/log.h"
#include "common/l2_cache_optimize.h"
#include "common/model_parser/base.h"
#include "common/model_parser/model_parser.h"
#include "common/properties_manager.h"
#include "common/types.h"

@@ -31,7 +31,6 @@
#include "common/op/ge_op_utils.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/load/model_manager/davinci_model_parser.h"
#include "graph/load/model_manager/model_manager.h"
//#include "new_op_test_utils.h"
#undef private


+ 87
- 0
tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc View File

@@ -0,0 +1,87 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <memory>
#include "graph/anchor.h"
#include "graph/attr_value.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "omg/omg_inner_types.h"
#define protected public
#define private public
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h"
#undef protected
#undef private
using namespace std;
using namespace testing;
using namespace ge;
using domi::GetContext;
class UtestGraphCachingAllocatorTest : public testing::Test {
protected:
void SetUp() {}
void TearDown() { GetContext().out_nodes_map.clear(); }
};
TEST_F(UtestGraphCachingAllocatorTest, initialize_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
MemManager::Instance().Finalize();
}
TEST_F(UtestGraphCachingAllocatorTest, malloc_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
EXPECT_NE(nullptr, ptr);
MemManager::Instance().Finalize();
}
TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
EXPECT_NE(nullptr, ptr);
ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize);
EXPECT_NE(nullptr, ptr);
MemManager::Instance().Finalize();
}
TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize);
EXPECT_NE(nullptr, ptr);
uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize);
EXPECT_NE(nullptr, ptr);
EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS);
EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS);
MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks();
MemManager::Instance().Finalize();
}

+ 113
- 0
tests/ut/ge/hybrid/ge_hybrid_unittest.cc View File

@@ -0,0 +1,113 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <vector>

#include "runtime/rt.h"

#define protected public
#define private public
#include "hybrid/model/hybrid_model_builder.h"
#include "hybrid/model/hybrid_model.h"
#include "model/ge_model.h"
#include "model/ge_root_model.h"

#include "hybrid/node_executor/aicore/aicore_op_task.h"
#include "framework/common/taskdown_common.h"
#include "framework/common/debug/log.h"
#include "graph/ge_context.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/types.h"

#undef private
#undef protected

using namespace std;
using namespace testing;
using namespace ge;

class UtestGeHybrid : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);

op_desc->SetWorkspace({});
;
op_desc->SetWorkspaceBytes({});
op_desc->SetInputOffset({});
op_desc->SetOutputOffset({});

ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
bool support_dynamic = true;
ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
return op_desc;
}

TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
// build aicore task
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));

OpDescPtr op_desc = CreateOpDesc("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
char *handle = "";
aicore_task->handle_ = handle;
aicore_task->tiling_key_ = 1;
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
}

TEST_F(UtestGeHybrid, task_update_tiling_info) {
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
aicore_task->is_single_op_ = true;
auto graph = make_shared<ComputeGraph>("graph");
OpDescPtr op_desc = CreateOpDesc("Add", "Add");
ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key");
ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json");
auto node = graph->AddNode(op_desc);
optiling::OpRunInfo tiling_info;
ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS);
}

+ 28
- 4
tests/ut/ge/single_op/single_op_model_unittest.cc View File

@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test {
void TearDown() {}
};

//rt api stub
rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) {
return RT_ERROR_NONE;
}
/*
TEST_F(UtestSingleOpModel, test_init_model) {
string model_data_str = "123456789";
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) {

std::mutex stream_mu_;
rtStream_t stream_ = nullptr;
SingleOp single_op(&stream_mu_, stream_);
ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
// SingleOp single_op(&stream_mu_, stream_);
//
// ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
}
/*
TEST_F(UtestSingleOpModel, test_build_kernel_task) {
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) {
ASSERT_EQ(op_model.Init(), FAILED);
}
*/
/*
TEST_F(UtestSingleOpModel, test_parse_arg_table) {
string model_data_str = "123456789";
SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size());
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) {
ASSERT_EQ(op.arg_table_[1].size(), 1);
ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]);
}
*/
TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) {
string name = "relu";
string type = "relu";
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);
TbeOpTask task;
task.op_desc_ = op_desc;
task.model_name_ = "resnet_50";
task.model_id_ = 1;
TaskDescInfo task_desc_info;
uint32_t model_id;
task.GetProfilingArgs(task_desc_info, model_id);

ASSERT_EQ(task_desc_info.model_name, "resnet_50");
ASSERT_EQ(model_id, 1);
}



+ 117
- 0
tests/ut/ge/single_op/single_op_task_unittest.cc View File

@@ -0,0 +1,117 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <vector>

#include "graph/load/model_manager/model_utils.h"
#include "graph/utils/graph_utils.h"
#include "runtime/rt.h"

#define protected public
#define private public
#include "single_op/single_op_model.h"
#include "single_op/task/tbe_task_builder.h"
#include "single_op/task/op_task.h"
#include "single_op/task/tbe_task_builder.h"
#include "external/register/op_tiling_registry.h"
#undef private
#undef protected

using namespace std;
using namespace testing;
using namespace ge;
using namespace optiling;

class UtestSingleOpTask : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

TEST_F(UtestSingleOpTask, test_build_kernel_task) {
string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
model.input_offset_list_.push_back(0);
model.input_sizes_.push_back(16);

model.output_offset_list_.push_back(0);
model.output_sizes_.push_back(16);

auto graph = make_shared<ComputeGraph>("graph");
auto op_desc = make_shared<OpDesc>("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);

vector<int64_t> shape{16, 16};
GeShape ge_shape(shape);
GeTensorDesc desc(ge_shape);
op_desc->AddInputDesc(desc);
op_desc->AddOutputDesc(desc);
auto node = graph->AddNode(op_desc);

std::mutex stream_mu_;
rtStream_t stream_ = nullptr;
StreamResource stream_resource(0);
SingleOp single_op(&stream_resource, &stream_mu_, stream_);

domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
model.op_list_[1] = node;

TbeOpTask task_tmp;
TbeOpTask *task = &task_tmp;
ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS);
vector<GeTensorDesc> input_desc;
vector<DataBuffer> input_buffers;
vector<GeTensorDesc> output_desc;
vector<DataBuffer> output_buffers;
task->node_ = node;
OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;};
OpTilingRegistryInterf("Add", op_tiling_func);
ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key");
ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json");
char c = '0';
char* buffer = &c;
task->tiling_buffer_ = buffer;
task->max_tiling_size_ = 64;
task->tiling_data_ = "tiling_data";
task->arg_size_ = 64;
uint8_t task_args{0};
task->args_.reset(&task_args);

ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
char handle_tmp = '0';
char *handle = &handle_tmp;
task->SetHandle(handle);
ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
}

+ 35
- 0
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
#define RT_KERNEL_CUSTOM_AICPU (0x08)

/**
* @ingroup rt_kernel
* @brief kernel mode
*/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)

/**
* @ingroup rt_kernel
* @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
*/
RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register device binary
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream);

/**
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
* @param [in] smDesc shared memory description
* @param [in] stream associated stream
* @param [in] kernelInfo kernel info
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);

/**
* @ingroup rt_kernel
* @brief launch kernel to device


+ 13
- 0
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_ALL_KERNEL,
} rtModelTaskType_t;

typedef enum tagModelStreamType {
@@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo {
uint16_t *argsOffset;
} rtKernelTaskInfo_t;

typedef struct tagAllKernelTaskInfo {
uint16_t blockDim;
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
const void *dev_func;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;
typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@@ -251,6 +263,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allkernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;


Loading…
Cancel
Save