| @@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(runtime libruntime.so ${GE_LIB_PATH}) | find_module(runtime libruntime.so ${GE_LIB_PATH}) | ||||
| find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH}) | find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH}) | ||||
| find_module(resource libresource.so ${GE_LIB_PATH}) | find_module(resource libresource.so ${GE_LIB_PATH}) | ||||
| find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | ||||
| find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) | ||||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | ||||
| elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | ||||
| @@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC) | |||||
| else() | else() | ||||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | find_module(slog libalog.so ${ASCEND_ATC_DIR}) | ||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| @@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
| @@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC) | |||||
| elseif(PLATFORM STREQUAL "all") | elseif(PLATFORM STREQUAL "all") | ||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | ||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| else() | else() | ||||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | ||||
| @@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL) | |||||
| # common libraries | # common libraries | ||||
| find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| @@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES) | |||||
| # common libraries | # common libraries | ||||
| find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | ||||
| @@ -76,8 +76,8 @@ checkopts() | |||||
| ENABLE_GE_ST="on" | ENABLE_GE_ST="on" | ||||
| ;; | ;; | ||||
| t) | t) | ||||
| ENABLE_GE_UT="on" | |||||
| ;; | |||||
| ENABLE_GE_UT="on" | |||||
| ;; | |||||
| c) | c) | ||||
| ENABLE_GE_COV="on" | ENABLE_GE_COV="on" | ||||
| ;; | ;; | ||||
| @@ -185,7 +185,7 @@ build_graphengine() | |||||
| # build all the target | # build all the target | ||||
| TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | ||||
| fi | fi | ||||
| make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | ||||
| if [ $? -ne 0 ] | if [ $? -ne 0 ] | ||||
| then | then | ||||
| @@ -214,13 +214,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | ||||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | ||||
| ${OUTPUT_PATH}/ut_libgraph && | |||||
| ${OUTPUT_PATH}/ut_libge_multiparts_utest && | |||||
| ${OUTPUT_PATH}/ut_libge_distinct_load_utest && | |||||
| ${OUTPUT_PATH}/ut_libge_others_utest && | |||||
| ${OUTPUT_PATH}/ut_libge_kernel_utest | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libgraph && ${RUN_TEST_CASE} && | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_multiparts_utest && ${RUN_TEST_CASE} && | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_distinct_load_utest && ${RUN_TEST_CASE} && | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_others_utest && ${RUN_TEST_CASE} && | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_kernel_utest && ${RUN_TEST_CASE} | |||||
| if [[ "$?" -ne 0 ]]; then | if [[ "$?" -ne 0 ]]; then | ||||
| echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | ||||
| echo -e "\033[31m${RUN_TEST_CASE}\033[0m" | |||||
| exit 1; | exit 1; | ||||
| fi | fi | ||||
| echo "Generating coverage statistics, please wait..." | echo "Generating coverage statistics, please wait..." | ||||
| @@ -249,8 +250,8 @@ generate_package() | |||||
| NNENGINE_PATH="plugin/nnengine/ge_config" | NNENGINE_PATH="plugin/nnengine/ge_config" | ||||
| OPSKERNEL_PATH="plugin/opskernel" | OPSKERNEL_PATH="plugin/opskernel" | ||||
| ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so") | |||||
| FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so") | |||||
| ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") | |||||
| FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") | |||||
| PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt") | PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt") | ||||
| PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") | PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") | ||||
| @@ -269,7 +270,7 @@ generate_package() | |||||
| mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | ||||
| mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | ||||
| mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | ||||
| cd "${OUTPUT_PATH}" | cd "${OUTPUT_PATH}" | ||||
| find ./ -name graphengine_lib.tar -exec rm {} \; | find ./ -name graphengine_lib.tar -exec rm {} \; | ||||
| @@ -133,7 +133,6 @@ set(TRAIN_SRC_LIST | |||||
| "graph/load/model_manager/data_dumper.cc" | "graph/load/model_manager/data_dumper.cc" | ||||
| "graph/load/model_manager/data_inputer.cc" | "graph/load/model_manager/data_inputer.cc" | ||||
| "graph/load/model_manager/davinci_model.cc" | "graph/load/model_manager/davinci_model.cc" | ||||
| "graph/load/model_manager/davinci_model_parser.cc" | |||||
| "graph/load/model_manager/model_manager.cc" | "graph/load/model_manager/model_manager.cc" | ||||
| "graph/load/model_manager/model_utils.cc" | "graph/load/model_manager/model_utils.cc" | ||||
| "graph/load/model_manager/aipp_utils.cc" | "graph/load/model_manager/aipp_utils.cc" | ||||
| @@ -613,7 +612,6 @@ set(INFER_SRC_LIST | |||||
| "graph/load/model_manager/model_manager.cc" | "graph/load/model_manager/model_manager.cc" | ||||
| "graph/load/model_manager/data_inputer.cc" | "graph/load/model_manager/data_inputer.cc" | ||||
| "graph/load/model_manager/davinci_model.cc" | "graph/load/model_manager/davinci_model.cc" | ||||
| "graph/load/model_manager/davinci_model_parser.cc" | |||||
| "graph/load/model_manager/model_utils.cc" | "graph/load/model_manager/model_utils.cc" | ||||
| "graph/load/model_manager/aipp_utils.cc" | "graph/load/model_manager/aipp_utils.cc" | ||||
| "graph/load/model_manager/tbe_handle_store.cc" | "graph/load/model_manager/tbe_handle_store.cc" | ||||
| @@ -32,6 +32,7 @@ | |||||
| #include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
| #include "register/op_registry.h" | #include "register/op_registry.h" | ||||
| #include "common/ge/tbe_plugin_manager.h" | #include "common/ge/tbe_plugin_manager.h" | ||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "toolchain/plog.h" | #include "toolchain/plog.h" | ||||
| using domi::OpRegistry; | using domi::OpRegistry; | ||||
| @@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map<string, string> &options) { | |||||
| // Initialize GE, prepare for execution, call GELib::Initialize | // Initialize GE, prepare for execution, call GELib::Initialize | ||||
| Status GEInitializeImpl(const std::map<string, string> &options) { | Status GEInitializeImpl(const std::map<string, string> &options) { | ||||
| GELOGT(TRACE_INIT, "GEInitialize start"); | GELOGT(TRACE_INIT, "GEInitialize start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| // 0.check init status | // 0.check init status | ||||
| if (g_ge_initialized) { | if (g_ge_initialized) { | ||||
| GELOGW("GEInitialize is called more than once"); | GELOGW("GEInitialize is called more than once"); | ||||
| @@ -157,6 +160,8 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||||
| // GE finalize, releasing all resources | // GE finalize, releasing all resources | ||||
| Status GEFinalize() { | Status GEFinalize() { | ||||
| GELOGT(TRACE_INIT, "GEFinalize start"); | GELOGT(TRACE_INIT, "GEFinalize start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| // check init status | // check init status | ||||
| if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
| GELOGW("GEFinalize is called before GEInitialize"); | GELOGW("GEFinalize is called before GEInitialize"); | ||||
| @@ -202,9 +207,19 @@ Status GEFinalize() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::string GEGetErrorMsg() { | |||||
| return ErrorManager::GetInstance().GetErrorMessage(); | |||||
| } | |||||
| std::string GEGetWarningMsg() { | |||||
| return ErrorManager::GetInstance().GetWarningMessage(); | |||||
| } | |||||
| // Initialize session,which calls innerSession | // Initialize session,which calls innerSession | ||||
| Session::Session(const std::map<string, string> &options) { | Session::Session(const std::map<string, string> &options) { | ||||
| GELOGT(TRACE_INIT, "Session Constructor start"); | GELOGT(TRACE_INIT, "Session Constructor start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| // check init status | // check init status | ||||
| sessionId_ = 0; | sessionId_ = 0; | ||||
| if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
| @@ -235,6 +250,8 @@ Session::Session(const std::map<string, string> &options) { | |||||
| Session::Session(const std::map<AscendString, AscendString> &options) { | Session::Session(const std::map<AscendString, AscendString> &options) { | ||||
| GELOGT(TRACE_INIT, "Session Constructor start"); | GELOGT(TRACE_INIT, "Session Constructor start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| // check init status | // check init status | ||||
| sessionId_ = 0; | sessionId_ = 0; | ||||
| if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
| @@ -311,11 +328,13 @@ Session::~Session() { | |||||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | ||||
| std::map<std::string, std::string> options; | std::map<std::string, std::string> options; | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| return AddGraph(graph_id, graph, options); | return AddGraph(graph_id, graph, options); | ||||
| } | } | ||||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | ||||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | ||||
| @@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<s | |||||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | ||||
| const std::map<AscendString, AscendString> &options) { | const std::map<AscendString, AscendString> &options) { | ||||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | ||||
| @@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||||
| } | } | ||||
| Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::map<AscendString, AscendString> options; | std::map<AscendString, AscendString> options; | ||||
| return AddGraphWithCopy(graph_id, graph, options); | return AddGraphWithCopy(graph_id, graph, options); | ||||
| } | } | ||||
| @@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | |||||
| Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | ||||
| const std::map<AscendString, AscendString> &options) { | const std::map<AscendString, AscendString> &options) { | ||||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | ||||
| @@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||||
| Status Session::RemoveGraph(uint32_t graph_id) { | Status Session::RemoveGraph(uint32_t graph_id) { | ||||
| GELOGT(TRACE_INIT, "Session RemoveGraph start"); | GELOGT(TRACE_INIT, "Session RemoveGraph start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| // call RemoveGraph | // call RemoveGraph | ||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (!instance_ptr || !instance_ptr->InitFlag()) { | if (!instance_ptr || !instance_ptr->InitFlag()) { | ||||
| @@ -457,6 +480,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) { | |||||
| Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) { | Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) { | ||||
| GELOGT(TRACE_INIT, "Session RunGraph start"); | GELOGT(TRACE_INIT, "Session RunGraph start"); | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::vector<Tensor> graph_inputs = inputs; | std::vector<Tensor> graph_inputs = inputs; | ||||
| // call RunGraph | // call RunGraph | ||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| @@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s | |||||
| } | } | ||||
| Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) { | Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); | return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); | ||||
| } | } | ||||
| Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) { | Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| std::string str_key; | std::string str_key; | ||||
| if (key != nullptr) { | if (key != nullptr) { | ||||
| str_key = key; | str_key = key; | ||||
| @@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu | |||||
| } | } | ||||
| Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | ||||
| @@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> | |||||
| Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | ||||
| RunAsyncCallback callback) { | RunAsyncCallback callback) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | ||||
| @@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn | |||||
| } | } | ||||
| Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) { | Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| auto instance_ptr = ge::GELib::GetInstance(); | auto instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | ||||
| @@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector<std::string> &var_names, std::vec | |||||
| } | } | ||||
| Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) { | Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) { | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| auto instance_ptr = ge::GELib::GetInstance(); | auto instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | ||||
| @@ -54,7 +54,7 @@ set(SRC_LIST | |||||
| "util.cc" | "util.cc" | ||||
| "properties_manager.cc" | "properties_manager.cc" | ||||
| "types.cc" | "types.cc" | ||||
| "model_parser/base.cc" | |||||
| "model_parser/model_parser.cc" | |||||
| "kernel_store.cc" | "kernel_store.cc" | ||||
| "tbe_kernel_store.cc" | "tbe_kernel_store.cc" | ||||
| "cust_aicpu_kernel_store.cc" | "cust_aicpu_kernel_store.cc" | ||||
| @@ -53,6 +53,7 @@ string PluginManager::GetPath() { | |||||
| GELOGW("Failed to read the shared library file path!"); | GELOGW("Failed to read the shared library file path!"); | ||||
| return string(); | return string(); | ||||
| } else { | } else { | ||||
| GE_IF_BOOL_EXEC(dl_info.dli_fname == nullptr, return string()); | |||||
| std::string so_path = dl_info.dli_fname; | std::string so_path = dl_info.dli_fname; | ||||
| char path[MMPA_MAX_PATH] = {0}; | char path[MMPA_MAX_PATH] = {0}; | ||||
| if (so_path.length() >= MMPA_MAX_PATH) { | if (so_path.length() >= MMPA_MAX_PATH) { | ||||
| @@ -14,22 +14,15 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include <climits> | |||||
| #include "common/helper/model_cache_helper.h" | |||||
| #include <cstdio> | #include <cstdio> | ||||
| #include <fstream> | #include <fstream> | ||||
| #include <functional> | #include <functional> | ||||
| #include "common/ge/ge_util.h" | |||||
| #include "common/helper/model_cache_helper.h" | |||||
| #include "common/types.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/ge_types.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "framework/common/helper/model_helper.h" | #include "framework/common/helper/model_helper.h" | ||||
| #include "framework/common/util.h" | |||||
| #include "graph/detail/attributes_holder.h" | |||||
| #include "graph/detail/model_serialize_imp.h" | #include "graph/detail/model_serialize_imp.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/model.h" | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| @@ -1682,7 +1675,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const { | |||||
| string key_path; | string key_path; | ||||
| int32_t priority = 0; | int32_t priority = 0; | ||||
| ModelData model_data; | ModelData model_data; | ||||
| ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | |||||
| ret = ModelParserBase::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); | GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); | ||||
| return ret; | return ret; | ||||
| @@ -16,16 +16,10 @@ | |||||
| #include "framework/common/helper/model_helper.h" | #include "framework/common/helper/model_helper.h" | ||||
| #include "common/ge/ge_util.h" | |||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "framework/omg/model_tool.h" | #include "framework/omg/model_tool.h" | ||||
| #include "framework/omg/version.h" | #include "framework/omg/version.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| using std::string; | using std::string; | ||||
| @@ -465,7 +459,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||||
| return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; | return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; | ||||
| } | } | ||||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||||
| Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| @@ -514,7 +508,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||||
| Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| @@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
| GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||||
| GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu", | |||||
| partition_table->num, sizeof(ModelFileHeader), mem_offset); | partition_table->num, sizeof(ModelFileHeader), mem_offset); | ||||
| if (model_data_size <= mem_offset) { | if (model_data_size <= mem_offset) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | ||||
| @@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | ||||
| if (model_data_size <= cur_offset) { | if (model_data_size <= cur_offset) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||||
| "invalid model data, partition_table->num:%u, model data size %u", | |||||
| partition_table->num, model_data_size); | partition_table->num, model_data_size); | ||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | ||||
| } | } | ||||
| @@ -14,16 +14,13 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "common/model_parser/base.h" | |||||
| #include "common/helper/model_helper.h" | |||||
| #include <securec.h> | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include <fstream> | #include <fstream> | ||||
| #include <memory> | |||||
| #include <string> | #include <string> | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "securec.h" | |||||
| #include "common/helper/model_helper.h" | |||||
| namespace ge { | namespace ge { | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {} | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {} | ||||
| @@ -20,6 +20,8 @@ | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "graph/types.h" | |||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| @@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
| const int32_t kMaxDeviceNum = 256; | const int32_t kMaxDeviceNum = 256; | ||||
| const uint32_t kInteval = 2; | |||||
| const std::string kConfigNumsdev = "devNums"; | const std::string kConfigNumsdev = "devNums"; | ||||
| const std::string kConfigDevIdList = "devIdList"; | const std::string kConfigDevIdList = "devIdList"; | ||||
| const std::string kProfStart = "prof_start"; | const std::string kProfStart = "prof_start"; | ||||
| const std::string kProfStop = "prof_stop"; | const std::string kProfStop = "prof_stop"; | ||||
| const std::string kProfModelSubscribe = "prof_model_subscribe"; | const std::string kProfModelSubscribe = "prof_model_subscribe"; | ||||
| const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | ||||
| const std::string kModelName = "model_name"; | |||||
| const std::string kModelId = "model_id"; | |||||
| const std::string kOpNmae = "op_name"; | |||||
| const std::string kOptype = "op_type"; | |||||
| const std::string kBlockDim = "block_dims"; | |||||
| const std::string kTaskId = "task_id"; | |||||
| const std::string kStreamId = "stream_id"; | |||||
| const std::string kShapeType = "shape_type"; | |||||
| const std::string kCurIterNum = "cur_iter_num"; | |||||
| const std::string kTaskType = "task_type"; | |||||
| const std::string kInput = "input"; | |||||
| const std::string kOutput = "output"; | |||||
| const std::string kFormat = "format"; | |||||
| const std::string kDataType = "data_type"; | |||||
| const std::string kShape = "shape"; | |||||
| const std::string kIdx = "idx"; | |||||
| #endif | #endif | ||||
| } // namespace | } // namespace | ||||
| @@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||||
| const TaskDescInfo &task, Json &task_json) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| std::string data; | |||||
| for (const auto &task : task_desc_info) { | |||||
| std::string model_name = task.model_name; | |||||
| std::string op_name = task.op_name; | |||||
| uint32_t block_dim = task.block_dim; | |||||
| uint32_t task_id = task.task_id; | |||||
| uint32_t stream_id = task.stream_id; | |||||
| std::string shape_type = task.shape_type; | |||||
| int64_t cur_iter_num = task.cur_iter_num; | |||||
| uint32_t task_type = task.task_type; | |||||
| data = model_name.append(" ") | |||||
| .append(op_name).append(" ") | |||||
| .append(std::to_string(block_dim)).append(" ") | |||||
| .append(std::to_string(task_id)).append(" ") | |||||
| .append(std::to_string(stream_id)).append(" ") | |||||
| .append(std::to_string(model_id)).append(" ") | |||||
| .append(shape_type).append(" ") | |||||
| .append(std::to_string(cur_iter_num)).append(" ") | |||||
| .append(std::to_string(task_type)).append("\n"); | |||||
| ReporterData reporter_data{}; | |||||
| reporter_data.deviceId = device_id; | |||||
| reporter_data.data = (unsigned char *)data.c_str(); | |||||
| reporter_data.dataLen = data.size(); | |||||
| int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||||
| if (ret != EOK) { | |||||
| GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||||
| return; | |||||
| } | |||||
| int32_t cb_ret = CallMsprofReport(reporter_data); | |||||
| if (cb_ret != 0) { | |||||
| GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||||
| return; | |||||
| } | |||||
| for (size_t i = 0; i < task.input_format.size(); i++) { | |||||
| Json tmp_input; | |||||
| tmp_input[kIdx] = i; | |||||
| Format format = task.input_format[i]; | |||||
| tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
| DataType data_type = task.input_data_type[i]; | |||||
| tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
| tmp_input[kShape] = task.input_shape[i]; | |||||
| task_json[kInput] += tmp_input; | |||||
| } | |||||
| for (size_t i = 0; i < task.output_format.size(); i++) { | |||||
| Json tmp_output; | |||||
| tmp_output[kIdx] = i; | |||||
| Format format = task.output_format[i]; | |||||
| tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
| DataType data_type = task.output_data_type[i]; | |||||
| tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
| tmp_output[kShape] = task.output_shape[i]; | |||||
| task_json[kOutput] += tmp_output; | |||||
| } | } | ||||
| data.clear(); | |||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||||
| uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| std::string data; | |||||
| for (const auto &graph : compute_graph_desc_info) { | |||||
| data.append("model_name:") | |||||
| .append(graph.model_name) | |||||
| .append(" op_name:") | |||||
| .append(graph.op_name) | |||||
| .append(" op_type:") | |||||
| .append(graph.op_type); | |||||
| for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||||
| data.append(" input_id:") | |||||
| .append(std::to_string(i)) | |||||
| .append(" input_format:") | |||||
| .append(std::to_string(graph.input_format.at(i))) | |||||
| .append(" input_data_type:") | |||||
| .append(std::to_string(graph.input_data_type.at(i))) | |||||
| .append(" input_shape:\""); | |||||
| size_t input_shape_len = graph.input_shape.at(i).size(); | |||||
| if (input_shape_len == 0) { | |||||
| data.append(""); | |||||
| } else if (input_shape_len == 1) { | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||||
| } else { | |||||
| for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||||
| } | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||||
| } | |||||
| data.append("\""); | |||||
| } | |||||
| for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||||
| data.append(" output_id:") | |||||
| .append(std::to_string(i)) | |||||
| .append(" output_format:") | |||||
| .append(std::to_string(graph.output_format.at(i))) | |||||
| .append(" output_data_type:") | |||||
| .append(std::to_string(graph.output_data_type.at(i))) | |||||
| .append(" output_shape:\""); | |||||
| size_t output_shape_len = graph.output_shape.at(i).size(); | |||||
| if (output_shape_len == 0) { | |||||
| data.append(""); | |||||
| } else if (output_shape_len == 1) { | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||||
| } else { | |||||
| for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||||
| } | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||||
| } | |||||
| data.append("\""); | |||||
| for (const auto &task : task_desc_info) { | |||||
| Json task_info; | |||||
| task_info[kModelName] = task.model_name; | |||||
| task_info[kModelId] = model_id; | |||||
| task_info[kOpNmae] = task.op_name; | |||||
| task_info[kOptype] = task.op_type; | |||||
| task_info[kBlockDim] = task.block_dim; | |||||
| task_info[kTaskType] = task.task_type; | |||||
| task_info[kTaskId] = task.task_id; | |||||
| task_info[kStreamId] = task.stream_id; | |||||
| task_info[kCurIterNum] = task.cur_iter_num; | |||||
| task_info[kShapeType] = task.shape_type; | |||||
| ProfilingOpInputOutInfo(task, task_info); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| return ; | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| return; | |||||
| } | } | ||||
| data.append(" model_id:").append(std::to_string(model_id)); | |||||
| data.append(" task_id:").append(std::to_string(graph.task_id)); | |||||
| data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||||
| data.append("\n"); | |||||
| GraphDescReport(device_id, data); | |||||
| data.clear(); | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| ReportData(device_id, reported_data, "task_desc_info"); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||||
| const int32_t &device_id, const string &data, const string &tag_name) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
| int ret = -1; | int ret = -1; | ||||
| @@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||||
| size_t index = data.size() / kReportMaxLen; | size_t index = data.size() / kReportMaxLen; | ||||
| if (index >= 1) { | if (index >= 1) { | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
| for (size_t i = 0; i < index; ++i) { | for (size_t i = 0; i < index; ++i) { | ||||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | ||||
| reporter_data.dataLen = kReportMaxLen; | reporter_data.dataLen = kReportMaxLen; | ||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| reporter_data.dataLen = data.size() - kReportMaxLen * index; | reporter_data.dataLen = data.size() - kReportMaxLen * index; | ||||
| if (reporter_data.dataLen != 0) { | if (reporter_data.dataLen != 0) { | ||||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | ||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| } else { | } else { | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| reporter_data.data = (unsigned char *)data.c_str(); | reporter_data.data = (unsigned char *)data.c_str(); | ||||
| reporter_data.dataLen = data.size(); | reporter_data.dataLen = data.size(); | ||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | ||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| int32_t logic_device_id = 0; | int32_t logic_device_id = 0; | ||||
| rtError_t rt_ret = rtGetDevice(&logic_device_id); | rtError_t rt_ret = rtGetDevice(&logic_device_id); | ||||
| @@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
| GELOGD("current logic_device_id:%d", logic_device_id); | GELOGD("current logic_device_id:%d", logic_device_id); | ||||
| GELOGD("start ProfilingTaskDescInfo."); | GELOGD("start ProfilingTaskDescInfo."); | ||||
| ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ||||
| GELOGD("start ProfilingGraphDescInfo."); | |||||
| ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||||
| GELOGD("Report profiling data for GE end."); | GELOGD("Report profiling data for GE end."); | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||||
| static_cast<void *>(&reporter_data), sizeof(ReporterData)); | static_cast<void *>(&reporter_data), sizeof(ReporterData)); | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||||
| const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||||
| std::vector<Format> input_format; | |||||
| std::vector<std::vector<int64_t>> input_shape; | |||||
| std::vector<DataType> input_data_type; | |||||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||||
| if (input_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| input_format.emplace_back(input_tensor_desc->GetFormat()); | |||||
| input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||||
| input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||||
| } | |||||
| std::vector<Format> output_format; | |||||
| std::vector<std::vector<int64_t>> output_shape; | |||||
| std::vector<DataType> output_data_type; | |||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||||
| if (output_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| output_format.emplace_back(output_tensor_desc->GetFormat()); | |||||
| output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||||
| output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||||
| } | |||||
| std::vector<Format> format_default = { FORMAT_NULL }; | |||||
| std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||||
| std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||||
| task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||||
| task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||||
| task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||||
| task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||||
| task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||||
| task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | ||||
| std::string &fp_point, std::string &bp_point) { | std::string &fp_point, std::string &bp_point) { | ||||
| // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | ||||
| @@ -54,6 +54,8 @@ namespace { | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| class OpDesc; | |||||
| using OpDescPtr = std::shared_ptr<OpDesc>; | |||||
| struct DeviceSubsInfo { | struct DeviceSubsInfo { | ||||
| uint64_t module; | uint64_t module; | ||||
| uint32_t subscribe_count; | uint32_t subscribe_count; | ||||
| @@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| bool ProfilingModelExecuteOn() const; | bool ProfilingModelExecuteOn() const; | ||||
| // is_execute_profiling_ only used by ge option and env | // is_execute_profiling_ only used by ge option and env | ||||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | ||||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||||
| void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | ||||
| const int32_t &device_id); | const int32_t &device_id); | ||||
| void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||||
| const int32_t &device_id); | |||||
| void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||||
| Status PluginInit() const; | Status PluginInit() const; | ||||
| void PluginUnInit() const; | void PluginUnInit() const; | ||||
| Status CallMsprofReport(ReporterData &reporter_data) const; | Status CallMsprofReport(ReporterData &reporter_data) const; | ||||
| @@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | ||||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | ||||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | ||||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||||
| private: | private: | ||||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | ||||
| Status ParseOptions(const std::string &options); | Status ParseOptions(const std::string &options); | ||||
| @@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | ||||
| vector<int32_t> &device_list); | vector<int32_t> &device_list); | ||||
| uint64_t GetProfilingModule(); | uint64_t GetProfilingModule(); | ||||
| void GraphDescReport(const int32_t &device_id, const string &data); | |||||
| void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | ||||
| void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | ||||
| @@ -33,7 +33,6 @@ set(SRC_LIST | |||||
| "../model/ge_model.cc" | "../model/ge_model.cc" | ||||
| "../model/ge_root_model.cc" | "../model/ge_root_model.cc" | ||||
| "../graph/load/model_manager/davinci_model.cc" | "../graph/load/model_manager/davinci_model.cc" | ||||
| "../graph/load/model_manager/davinci_model_parser.cc" | |||||
| "../graph/load/model_manager/model_manager.cc" | "../graph/load/model_manager/model_manager.cc" | ||||
| "../graph/load/model_manager/tbe_handle_store.cc" | "../graph/load/model_manager/tbe_handle_store.cc" | ||||
| "../graph/load/model_manager/cpu_queue_schedule.cc" | "../graph/load/model_manager/cpu_queue_schedule.cc" | ||||
| @@ -250,15 +249,14 @@ target_link_options(ge_executor_shared PRIVATE | |||||
| target_link_libraries(ge_executor_shared PRIVATE | target_link_libraries(ge_executor_shared PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| msprofiler | msprofiler | ||||
| static_mmpa | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| ge_common | ge_common | ||||
| runtime | runtime | ||||
| slog | slog | ||||
| mmpa | |||||
| graph | graph | ||||
| register | register | ||||
| error_manager | error_manager | ||||
| ascend_hal_stub | |||||
| ascend_protobuf | ascend_protobuf | ||||
| c_sec | c_sec | ||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| @@ -16,7 +16,6 @@ | |||||
| #include "executor/ge_executor.h" | #include "executor/ge_executor.h" | ||||
| #include <cce/cce.h> | #include <cce/cce.h> | ||||
| #include <cce/compiler_stub.h> | |||||
| #include <ctime> | #include <ctime> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include "common/debug/log.h" | #include "common/debug/log.h" | ||||
| @@ -24,19 +23,11 @@ | |||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/util.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/execute/graph_execute.h" | #include "graph/execute/graph_execute.h" | ||||
| #include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/model.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "mmpa/mmpa_api.h" | |||||
| #include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | |||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| @@ -454,7 +445,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||||
| "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
| i, dynamic_dims[i], all_data_dims[i]); | i, dynamic_dims[i], all_data_dims[i]); | ||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| @@ -930,12 +922,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size | |||||
| Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | ||||
| SingleOp **single_op) { | SingleOp **single_op) { | ||||
| return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); | |||||
| return LoadSingleOpV2(model_name, modelData, stream, single_op, 0); | |||||
| } | |||||
| Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||||
| SingleOp **single_op, const uint64_t model_id) { | |||||
| return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id); | |||||
| } | } | ||||
| Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | ||||
| DynamicSingleOp **single_op) { | DynamicSingleOp **single_op) { | ||||
| return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); | |||||
| return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0); | |||||
| } | |||||
| Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||||
| DynamicSingleOp **single_op, const uint64_t model_id) { | |||||
| return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id); | |||||
| } | } | ||||
| Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | ||||
| @@ -147,7 +147,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index, | |||||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | |||||
| bool attr) { | bool attr) { | ||||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
| @@ -671,6 +671,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||||
| Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
| bool is_offline) { | bool is_offline) { | ||||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||||
| impl_->is_offline_ = is_offline; | |||||
| if (!is_offline) { | if (!is_offline) { | ||||
| (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); | (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); | ||||
| } | } | ||||
| @@ -709,8 +711,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| GELOGI("ATC parser success in single op build."); | GELOGI("ATC parser success in single op build."); | ||||
| GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||||
| impl_->is_offline_ = is_offline; | |||||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | ||||
| map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | ||||
| GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
| @@ -723,7 +723,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph(); | const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph(); | ||||
| GeModelPtr &ge_model = name_to_ge_model.begin()->second; | GeModelPtr &ge_model = name_to_ge_model.begin()->second; | ||||
| GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph)); | GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph)); | ||||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||||
| GELOGI("After build model, The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||||
| bool all_shape = false; | bool all_shape = false; | ||||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | ||||
| @@ -738,6 +738,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | ||||
| } | } | ||||
| GELOGI("Start save GeModel to Model buffer"); | |||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -753,10 +754,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| */ | */ | ||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, const string &model_file_name) { | const vector<GeTensor> &outputs, const string &model_file_name) { | ||||
| GELOGI("Start to build single op offline model."); | |||||
| GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||||
| ModelBufferData model_buff; | ModelBufferData model_buff; | ||||
| OpEngineType engine_type = ENGINE_SYS; | OpEngineType engine_type = ENGINE_SYS; | ||||
| return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||||
| GELOGI("Finish build single offline model, status: %u", status); | |||||
| return status; | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -772,8 +775,10 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, OpEngineType engine_type, | const vector<GeTensor> &outputs, OpEngineType engine_type, | ||||
| ModelBufferData &model_buff) { | ModelBufferData &model_buff) { | ||||
| GELOGI("Start to build single op online"); | |||||
| return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | |||||
| GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||||
| Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | |||||
| GELOGI("Finish build single online model, status: %u", status); | |||||
| return status; | |||||
| } | } | ||||
| Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| @@ -798,8 +803,7 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (const auto &in_desc : inputs) { | for (const auto &in_desc : inputs) { | ||||
| GeTensorDesc input_desc = in_desc.GetTensorDesc(); | |||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); | |||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); | |||||
| arg_index++; | arg_index++; | ||||
| } | } | ||||
| } | } | ||||
| @@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
| } | } | ||||
| ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ||||
| int64_t dim_index, int64_t &output_mem_size, | |||||
| int64_t &batch_dim_num, int64_t &out_size) { | |||||
| int64_t dim_index, int64_t &output_mem_size, | |||||
| int64_t &batch_dim_num, int64_t &out_size) { | |||||
| graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | ||||
| if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Opdesc GetSize failed!"); | GELOGE(FAILED, "Opdesc GetSize failed!"); | ||||
| @@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
| GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second), | |||||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | |||||
| "Assign node %s continuous input memory failed.", node->GetName().c_str()) | "Assign node %s continuous input memory failed.", node->GetName().c_str()) | ||||
| } | } | ||||
| for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
| @@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
| } | } | ||||
| Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { | |||||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | |||||
| GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | ||||
| auto iter = memory_offset_.find(memory_type); | auto iter = memory_offset_.find(memory_type); | ||||
| if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
| @@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
| std::map<int32_t, int32_t> out2ins; | std::map<int32_t, int32_t> out2ins; | ||||
| GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | ||||
| // output is beginning offset, set offset for input; only support this case now | // output is beginning offset, set offset for input; only support this case now | ||||
| if (out2ins.size() == 1 && out2ins.begin()->second == 0) { | |||||
| if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | |||||
| auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | |||||
| output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | ||||
| peer_op_desc->SetOutputOffset(output_list); | peer_op_desc->SetOutputOffset(output_list); | ||||
| GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | |||||
| output_list_this.at(out2ins.begin()->first), peer_output_offset); | |||||
| } else { | } else { | ||||
| GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first, | |||||
| out2ins.begin()->second, out2ins.size()); | |||||
| GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), | |||||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); | |||||
| } | } | ||||
| // first input is beginning offset | // first input is beginning offset | ||||
| mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); | mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); | ||||
| @@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3 | |||||
| bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | ||||
| const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) { | const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) { | ||||
| for (const auto &in_node : input_continuous_node->GetInDataNodes()) { | for (const auto &in_node : input_continuous_node->GetInDataNodes()) { | ||||
| if (in_node->GetType() == VARIABLE) { | |||||
| GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(), | |||||
| in_node->GetName().c_str()); | |||||
| return true; | |||||
| } | |||||
| auto iter = node_2_continuous_type.find(in_node); | auto iter = node_2_continuous_type.find(in_node); | ||||
| // In node's topo order in the front, so function can not be exception | // In node's topo order in the front, so function can not be exception | ||||
| auto continuous_type = iter->second; | auto continuous_type = iter->second; | ||||
| @@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
| } | } | ||||
| ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | ||||
| uint32_t continuous_type) { | |||||
| uint32_t continuous_type, | |||||
| bool reverse_refresh) { | |||||
| int64_t mem_clean_start = 0; | int64_t mem_clean_start = 0; | ||||
| int64_t mem_clean_size = 0; | int64_t mem_clean_size = 0; | ||||
| int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
| GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | ||||
| auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type); | |||||
| auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | |||||
| continuous_type, reverse_refresh); | |||||
| if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
| GELOGE(ret, "Assign continuous input memory failed!"); | GELOGE(ret, "Assign continuous input memory failed!"); | ||||
| return ret; | return ret; | ||||
| @@ -131,13 +131,14 @@ class GraphMemoryAssigner { | |||||
| std::map<NodePtr, uint32_t> &node_2_continuous_type); | std::map<NodePtr, uint32_t> &node_2_continuous_type); | ||||
| ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | ||||
| uint32_t continuous_type); | |||||
| uint32_t continuous_type, bool reverse_refresh=false); | |||||
| ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ||||
| map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | ||||
| ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); | |||||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, | |||||
| bool reverse_refresh = false); | |||||
| ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); | ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); | ||||
| @@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
| // subgraph of dynamic graph no need to find index, has been found in parent graph | // subgraph of dynamic graph no need to find index, has been found in parent graph | ||||
| if (IsSubGraphOfDynamicGraph(graph)) { | if (IsSubGraphOfDynamicGraph(graph)) { | ||||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
| } | } | ||||
| GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | ||||
| is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | ||||
| profiling_point.end_index.size() ); | |||||
| profiling_point.end_index.size()); | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
| if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | ||||
| @@ -19,12 +19,8 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/model_parser/base.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "omm/csa_interact.h" | #include "omm/csa_interact.h" | ||||
| #include "runtime/dev.h" | |||||
| #include "runtime/mem.h" | |||||
| namespace ge { | namespace ge { | ||||
| GraphExecutor::GraphExecutor() | GraphExecutor::GraphExecutor() | ||||
| @@ -20,19 +20,13 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/util.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "omm/csa_interact.h" | #include "omm/csa_interact.h" | ||||
| #include "runtime/dev.h" | |||||
| namespace ge { | namespace ge { | ||||
| GraphLoader::GraphLoader() = default; | |||||
| GraphLoader::~GraphLoader() = default; | |||||
| Status GraphLoader::UnloadModel(uint32_t model_id) { | Status GraphLoader::UnloadModel(uint32_t model_id) { | ||||
| auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| @@ -120,7 +114,6 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) { | |||||
| Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority, | Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority, | ||||
| ModelData &model_data) { | ModelData &model_data) { | ||||
| Status ret; | |||||
| if (!CheckInputPathValid(path)) { | if (!CheckInputPathValid(path)) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | ||||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | ||||
| @@ -132,16 +125,15 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||||
| Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); | GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); | ||||
| if (model_data.model_data != nullptr) { | if (model_data.model_data != nullptr) { | ||||
| delete[] static_cast<char *>(model_data.model_data); | delete[] static_cast<char *>(model_data.model_data); | ||||
| model_data.model_data = nullptr; | model_data.model_data = nullptr; | ||||
| } | } | ||||
| return ret; | |||||
| } | } | ||||
| return SUCCESS; | |||||
| return ret; | |||||
| } | } | ||||
| Status GraphLoader::CommandHandle(const Command &command) { | Status GraphLoader::CommandHandle(const Command &command) { | ||||
| @@ -32,9 +32,9 @@ | |||||
| namespace ge { | namespace ge { | ||||
| class GraphLoader { | class GraphLoader { | ||||
| public: | public: | ||||
| GraphLoader(); | |||||
| GraphLoader() = default; | |||||
| virtual ~GraphLoader(); | |||||
| virtual ~GraphLoader() = default; | |||||
| GraphLoader(const GraphLoader &in) = delete; | GraphLoader(const GraphLoader &in) = delete; | ||||
| @@ -92,9 +92,35 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||||
| const uint32_t kEndOfSequenceNew = 507005; | const uint32_t kEndOfSequenceNew = 507005; | ||||
| const int32_t kModelAbortNormal = 0x0704000e; | const int32_t kModelAbortNormal = 0x0704000e; | ||||
| const int32_t kModelAbortNormalNew = 507024; | const int32_t kModelAbortNormalNew = 507024; | ||||
| const uint32_t kInteval = 2; | |||||
| const char *const kModelName = "model_name"; | |||||
| const char *const kModeleId = "model_id"; | |||||
| const char *const kLoadStartTime = "load_start_time"; | |||||
| const char *const kLoadEndTime = "load_end_time"; | |||||
| const char *const kFusionOpInfo = "fusion_op_info"; | |||||
| const char *const kFusionOpName = "fusion_op_name"; | |||||
| const char *const kOriginalOpNum = "origin_op_num"; | |||||
| const char *const kOriginalOpName = "origin_op_name"; | |||||
| const char *const kStreamId = "stream_id"; | |||||
| const char *const kFusionOpMemoryInfo = "memory_info"; | |||||
| const char *const kInputSize = "input_size"; | |||||
| const char *const kOutputSize = "output_size"; | |||||
| const char *const kWeightSize = "weight_size"; | |||||
| const char *const kWorkSpaceSize = "workspace_size"; | |||||
| const char *const kTotalSize = "total_size"; | |||||
| const char *const kTaskCount = "task_count"; | |||||
| const char *const kTaskId = "task_id"; | |||||
| const char* const kRequestId = "request_id"; | |||||
| const char* const kThreadId = "thread_id"; | |||||
| const char* const kInputBeginTime = "input_begin_time"; | |||||
| const char* const kInputEndTime = "input_end_time"; | |||||
| const char* const kInferBeginTime = "infer_begin_time"; | |||||
| const char* const kInferEndTime = "infer_end_time"; | |||||
| const char* const kOutputBeginTime = "output_start_time"; | |||||
| const char* const kOutputEndTime = "output_end_time"; | |||||
| inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
| return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | |||||
| return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||||
| } | } | ||||
| inline bool IsTbeTask(const OpDescPtr &op_desc) { | inline bool IsTbeTask(const OpDescPtr &op_desc) { | ||||
| @@ -187,12 +213,12 @@ DavinciModel::~DavinciModel() { | |||||
| UnbindTaskSinkStream(); | UnbindTaskSinkStream(); | ||||
| for (size_t i = 0; i < label_list_.size(); ++i) { | for (size_t i = 0; i < label_list_.size(); ++i) { | ||||
| if (label_list_[i] != nullptr) { | if (label_list_[i] != nullptr) { | ||||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); | |||||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i); | |||||
| } | } | ||||
| } | } | ||||
| for (size_t i = 0; i < stream_list_.size(); ++i) { | for (size_t i = 0; i < stream_list_.size(); ++i) { | ||||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); | |||||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i); | |||||
| } | } | ||||
| for (size_t i = 0; i < event_list_.size(); ++i) { | for (size_t i = 0; i < event_list_.size(); ++i) { | ||||
| @@ -360,7 +386,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); | GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| } | } | ||||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", | |||||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu].", | |||||
| runtime_param_.graph_id, mem_base_, data_size); | runtime_param_.graph_id, mem_base_, data_size); | ||||
| if (!is_inner_weight_base_) { | if (!is_inner_weight_base_) { | ||||
| @@ -381,7 +407,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| is_inner_p2p_mem_base_ = true; | is_inner_p2p_mem_base_ = true; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed"); | |||||
| runtime_param_.mem_base = mem_base_; | runtime_param_.mem_base = mem_base_; | ||||
| runtime_param_.weight_base = weights_mem_base_; | runtime_param_.weight_base = weights_mem_base_; | ||||
| runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | ||||
| @@ -391,7 +417,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| Status DavinciModel::InitVariableMem() { | Status DavinciModel::InitVariableMem() { | ||||
| // malloc variable memory base | // malloc variable memory base | ||||
| var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | ||||
| if (TotalVarMemSize() && var_mem_base_ == nullptr) { | |||||
| if (TotalVarMemSize() && (var_mem_base_ == nullptr)) { | |||||
| Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Malloc variable memory failed."); | GELOGE(ret, "Malloc variable memory failed."); | ||||
| @@ -500,25 +526,25 @@ Status DavinciModel::DoTaskSink() { | |||||
| } | } | ||||
| GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | ||||
| GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_); | |||||
| GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_); | |||||
| GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | ||||
| if (known_node_) { | if (known_node_) { | ||||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed"); | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||||
| GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | ||||
| @@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| } | } | ||||
| Status DavinciModel::ReportProfilingData() { | Status DavinciModel::ReportProfilingData() { | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||||
| Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetComputeGraphInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||||
| GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||||
| } | } | ||||
| Status DavinciModel::SinkModelProfile() { | Status DavinciModel::SinkModelProfile() { | ||||
| // profiling plugin must be registered | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
| ReporterData reporter_data{}; | |||||
| // report model data tag name | |||||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
| return FAILED, "Sink model tag memcpy error."); | |||||
| // Model Header | // Model Header | ||||
| std::string name = om_name_.empty() ? name_ : om_name_; | std::string name = om_name_.empty() ? name_ : om_name_; | ||||
| size_t name_len = name.size(); | |||||
| reporter_data.deviceId = device_id_; | |||||
| reporter_data.data = (unsigned char *)&name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)name.c_str(); | |||||
| reporter_data.dataLen = name.size(); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| uint32_t model_id = this->Id(); | uint32_t model_id = this->Id(); | ||||
| reporter_data.data = (unsigned char *)&model_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // Load Start/End Time | |||||
| int64_t start_time = this->GetLoadBeginTime(); | int64_t start_time = this->GetLoadBeginTime(); | ||||
| reporter_data.data = (unsigned char *)&start_time; | |||||
| reporter_data.dataLen = sizeof(int64_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| int64_t end_time = this->GetLoadEndTime(); | int64_t end_time = this->GetLoadEndTime(); | ||||
| reporter_data.data = (unsigned char *)&end_time; | |||||
| reporter_data.dataLen = sizeof(int64_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| Json model_load_info; | |||||
| model_load_info[kModelName] = name; | |||||
| model_load_info[kModeleId] = model_id; | |||||
| model_load_info[kLoadStartTime] = start_time; | |||||
| model_load_info[kLoadEndTime] = end_time; | |||||
| // fusion op info | |||||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | ||||
| using Range = std::pair<CIT, CIT>; | using Range = std::pair<CIT, CIT>; | ||||
| for (const ProfileInfo &profile : profile_list_) { | for (const ProfileInfo &profile : profile_list_) { | ||||
| // op name after fusion | |||||
| Json fusion_op_info; | |||||
| string fusion_op_name = profile.fusion_info.op_name; | string fusion_op_name = profile.fusion_info.op_name; | ||||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||||
| reporter_data.dataLen = fusion_op_name_len; | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // original op name before fusion | |||||
| uint32_t op_num = profile.fusion_info.original_op_names.size(); | uint32_t op_num = profile.fusion_info.original_op_names.size(); | ||||
| reporter_data.data = (unsigned char *)&op_num; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| vector<string> original_name; | |||||
| for (uint32_t k = 0; k < op_num; k++) { | for (uint32_t k = 0; k < op_num; k++) { | ||||
| std::string op_name = profile.fusion_info.original_op_names[k]; | |||||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||||
| reporter_data.data = (unsigned char *)&op_name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||||
| reporter_data.dataLen = op_name_len; | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| } | |||||
| // stream id info | |||||
| original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||||
| } | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| auto iter = profiler_report_op_info_.find(fusion_op_name); | auto iter = profiler_report_op_info_.find(fusion_op_name); | ||||
| if (iter != profiler_report_op_info_.end()) { | if (iter != profiler_report_op_info_.end()) { | ||||
| stream_id = iter->second.second; | stream_id = iter->second.second; | ||||
| } | } | ||||
| reporter_data.data = (unsigned char *)&stream_id; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // memory info | |||||
| reporter_data.data = (unsigned char *)&profile.memory_info; | |||||
| reporter_data.dataLen = sizeof(profile.memory_info); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // task info | |||||
| reporter_data.data = (unsigned char *)&profile.task_count; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| fusion_op_info[kFusionOpName] = fusion_op_name; | |||||
| fusion_op_info[kOriginalOpNum] = op_num; | |||||
| fusion_op_info[kOriginalOpName] = original_name; | |||||
| fusion_op_info[kStreamId] = stream_id; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||||
| fusion_op_info[kTaskCount] = profile.task_count; | |||||
| vector<uint32_t> task_id; | |||||
| Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | ||||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | ||||
| uint32_t task_id = idx->second; | |||||
| reporter_data.data = (unsigned char *)&task_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| task_id.push_back(idx->second); | |||||
| } | } | ||||
| fusion_op_info[kTaskId] = task_id; | |||||
| model_load_info[kFusionOpInfo] += fusion_op_info; | |||||
| } | } | ||||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| } | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | ||||
| // profiling plugin must be registered | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
| ReporterData reporter_data{}; | |||||
| string name = om_name_.empty() ? name_ : om_name_; | |||||
| Json model_time_info; | |||||
| model_time_info[kModelName] = name; | |||||
| model_time_info[kModeleId] = this->Id(); | |||||
| model_time_info[kRequestId] = current_data.request_id; | |||||
| model_time_info[kThreadId] = GetDataInputTid(); | |||||
| model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||||
| model_time_info[kInputEndTime] = time_info_.processEndTime; | |||||
| model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||||
| model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||||
| model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||||
| model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||||
| // report model data tag name | // report model data tag name | ||||
| std::string tag_name; | std::string tag_name; | ||||
| tag_name.append("model_time_info_") | tag_name.append("model_time_info_") | ||||
| .append(std::to_string(this->Id())) | |||||
| .append("_") | |||||
| .append(std::to_string(current_data.index)); | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
| return FAILED, "Sink model tag memcpy error."); | |||||
| // device id | |||||
| reporter_data.deviceId = device_id_; | |||||
| // Model Header | |||||
| string name; | |||||
| if (!om_name_.empty()) { | |||||
| name = om_name_; | |||||
| } else { | |||||
| name = name_; | |||||
| } | |||||
| size_t name_len = name.size(); | |||||
| reporter_data.data = (unsigned char *)&name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)name.c_str(); | |||||
| reporter_data.dataLen = name.size(); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // request id | |||||
| uint64_t request_id = current_data.request_id; | |||||
| reporter_data.data = (unsigned char *)&request_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| // thread id | |||||
| int32_t thread_id = GetDataInputTid(); | |||||
| reporter_data.data = (unsigned char *)&thread_id; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| // time info | |||||
| time_info_.modelId = this->Id(); | |||||
| reporter_data.data = (unsigned char *)&time_info_; | |||||
| reporter_data.dataLen = sizeof(struct timeInfo); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| .append(std::to_string(this->Id())) | |||||
| .append("_") | |||||
| .append(std::to_string(current_data.index)); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| } | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2641,6 +2589,7 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| bool seq_end_flag = false; | bool seq_end_flag = false; | ||||
| uint32_t model_id = model->Id(); | uint32_t model_id = model->Id(); | ||||
| uint32_t device_id = model->GetDeviceId(); | uint32_t device_id = model->GetDeviceId(); | ||||
| GetContext().SetWorkStreamId(model->GetWorkStreamId()); | |||||
| GELOGI("Model Run thread start, model_id:%u.", model_id); | GELOGI("Model Run thread start, model_id:%u.", model_id); | ||||
| rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id)); | rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id)); | ||||
| @@ -2807,6 +2756,7 @@ Status DavinciModel::ModelRunStart() { | |||||
| int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal); | int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal); | ||||
| maxDumpOpNum_ = maxDumpOpNum; | maxDumpOpNum_ = maxDumpOpNum; | ||||
| work_stream_id_ = GetContext().WorkStreamId(); | |||||
| CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | ||||
| GELOGI("model tread create success, model id:%u.", model_id_); | GELOGI("model tread create success, model id:%u.", model_id_); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -3069,13 +3019,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
| task_desc_info.model_name = name_; | task_desc_info.model_name = name_; | ||||
| } | } | ||||
| task_desc_info.op_name = op->GetName(); | task_desc_info.op_name = op->GetName(); | ||||
| task_desc_info.op_type = op->GetType(); | |||||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | task_desc_info.block_dim = task_def.kernel().block_dim(); | ||||
| task_desc_info.task_id = task->GetTaskID(); | task_desc_info.task_id = task->GetTaskID(); | ||||
| task_desc_info.stream_id = task->GetStreamId(); | task_desc_info.stream_id = task->GetStreamId(); | ||||
| task_desc_info.shape_type = "static"; | task_desc_info.shape_type = "static"; | ||||
| task_desc_info.cur_iter_num = 0; | task_desc_info.cur_iter_num = 0; | ||||
| // task type | |||||
| task_desc_info.task_type = kTaskTypeInvalid; | task_desc_info.task_type = kTaskTypeInvalid; | ||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | if (model_task_type == RT_MODEL_TASK_KERNEL) { | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| @@ -3107,7 +3059,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
| task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
| } | } | ||||
| } | } | ||||
| return; | |||||
| } | } | ||||
| Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
| @@ -3332,7 +3283,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
| /// | /// | ||||
| Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | ||||
| const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | ||||
| string input_or_output = "input"; | |||||
| string input_or_output; | |||||
| is_input ? input_or_output = "input" : input_or_output = "output"; | is_input ? input_or_output = "input" : input_or_output = "output"; | ||||
| if (blobs.size() != data_info.size()) { | if (blobs.size() != data_info.size()) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | ||||
| @@ -3342,7 +3293,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
| for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
| if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
| input_or_output.c_str(), data.first, blobs.size()); | input_or_output.c_str(), data.first, blobs.size()); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -4007,41 +3959,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||||
| main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | ||||
| } | } | ||||
| Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||||
| auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||||
| for (auto &op_desc : all_op_desc) { | |||||
| ComputeGraphDescInfo compute_graph_info; | |||||
| if (!om_name_.empty()) { | |||||
| compute_graph_info.model_name = om_name_; | |||||
| } else { | |||||
| compute_graph_info.model_name = name_; | |||||
| } | |||||
| std::vector<Format> format = { FORMAT_NULL }; | |||||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||||
| compute_graph_info.op_name = op_desc.op_name; | |||||
| compute_graph_info.op_type = op_desc.op_type; | |||||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||||
| if (iter != profiler_report_op_info_.end()) { | |||||
| task_id = iter->second.first; | |||||
| stream_id = iter->second.second; | |||||
| } | |||||
| compute_graph_info.task_id = task_id; | |||||
| compute_graph_info.stream_id = stream_id; | |||||
| graph_desc_info.emplace_back(compute_graph_info); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | ||||
| if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | ||||
| tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | ||||
| @@ -4133,10 +4050,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op | |||||
| int64_t data_input_size; | int64_t data_input_size; | ||||
| (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | ||||
| GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | ||||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -412,6 +412,8 @@ class DavinciModel { | |||||
| /// | /// | ||||
| uint64_t GetSessionId() const { return session_id_; } | uint64_t GetSessionId() const { return session_id_; } | ||||
| uint64_t GetWorkStreamId() const { return work_stream_id_; } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief SetDeviceId | /// @brief SetDeviceId | ||||
| @@ -840,9 +842,6 @@ class DavinciModel { | |||||
| Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | ||||
| // get desc info of graph for profiling | |||||
| Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | ||||
| Status InitL1DataDumperArgs(); | Status InitL1DataDumperArgs(); | ||||
| @@ -960,6 +959,7 @@ class DavinciModel { | |||||
| vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | ||||
| uint64_t session_id_; | uint64_t session_id_; | ||||
| uint64_t work_stream_id_; | |||||
| uint32_t device_id_; | uint32_t device_id_; | ||||
| @@ -1,23 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| namespace ge { | |||||
| DavinciModelParser::DavinciModelParser() {} | |||||
| DavinciModelParser::~DavinciModelParser() {} | |||||
| } // namespace ge | |||||
| @@ -1,46 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||||
| #include <securec.h> | |||||
| #include <memory> | |||||
| #include "common/debug/log.h" | |||||
| #include "common/ge_types.h" | |||||
| #include "common/model_parser/base.h" | |||||
| #include "common/types.h" | |||||
| #include "common/util.h" | |||||
| namespace ge { | |||||
| class DavinciModelParser : public ModelParserBase { | |||||
| public: | |||||
| /// | |||||
| /// @ingroup hiai | |||||
| /// @brief constructor | |||||
| /// | |||||
| DavinciModelParser(); | |||||
| /// | |||||
| /// @ingroup hiai | |||||
| /// @brief destructor | |||||
| /// | |||||
| ~DavinciModelParser(); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||||
| @@ -18,23 +18,15 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "mmpa/mmpa_api.h" | |||||
| #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" | #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" | ||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/l2_cache_optimize.h" | #include "common/l2_cache_optimize.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "common/properties_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
| #include "graph/common/local_context.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
| #include "hybrid/hybrid_davinci_model.h" | |||||
| namespace ge { | namespace ge { | ||||
| thread_local uint32_t device_count = 0; | thread_local uint32_t device_count = 0; | ||||
| @@ -1403,7 +1395,7 @@ Status ModelManager::LaunchCustAicpuSo() { | |||||
| Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) { | Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) { | ||||
| uint8_t *model_data = nullptr; | uint8_t *model_data = nullptr; | ||||
| uint32_t model_len = 0; | uint32_t model_len = 0; | ||||
| Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | |||||
| Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); | ||||
| OmFileLoadHelper om_file_helper; | OmFileLoadHelper om_file_helper; | ||||
| @@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, | |||||
| kBinSizeUnit8 * kMByteSize, | kBinSizeUnit8 * kMByteSize, | ||||
| kBinSizeUnit32 * kMByteSize, | kBinSizeUnit32 * kMByteSize, | ||||
| kBinSizeUnit128 * kMByteSize, | kBinSizeUnit128 * kMByteSize, | ||||
| kGByteSize, | |||||
| kBinSizeUnit4 * kGByteSize, | |||||
| kBinSizeUnit16 * kGByteSize, | |||||
| kBinSizeUnit26 * kGByteSize}; | |||||
| kBinSizeUnit256 * kMByteSize, | |||||
| kBinSizeUnit512 * kMByteSize, | |||||
| kGByteSize}; | |||||
| static bool BlockComparator(const Block *left, const Block *right) { | static bool BlockComparator(const Block *left, const Block *right) { | ||||
| if (left->size != right->size) { | if (left->size != right->size) { | ||||
| @@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) { | |||||
| size_t GetAllocationSize(size_t size) { | size_t GetAllocationSize(size_t size) { | ||||
| size_t index = GetBinIndex(size); | size_t index = GetBinIndex(size); | ||||
| return bin_ranges[index]; | |||||
| if (bin_ranges[index] >= size) { | |||||
| return bin_ranges[index]; | |||||
| } | |||||
| return kGByteSize * ((size + kGByteSize - 1) / kGByteSize); | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -119,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||||
| } | } | ||||
| uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | ||||
| GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | |||||
| uint8_t *ptr = nullptr; | uint8_t *ptr = nullptr; | ||||
| size = GetBlockSize(size); | size = GetBlockSize(size); | ||||
| Block *block = FindFreeBlock(size, org_ptr, device_id); | Block *block = FindFreeBlock(size, org_ptr, device_id); | ||||
| @@ -253,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui | |||||
| } | } | ||||
| Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | ||||
| GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); | |||||
| auto memory_size = GetAllocationSize(size); | auto memory_size = GetAllocationSize(size); | ||||
| const std::string purpose = "Memory for caching."; | const std::string purpose = "Memory for caching."; | ||||
| auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); | auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); | ||||
| @@ -36,17 +36,17 @@ namespace ge { | |||||
| constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes | constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes | ||||
| constexpr size_t kBinSizeUnit4 = 4; | constexpr size_t kBinSizeUnit4 = 4; | ||||
| constexpr size_t kBinSizeUnit8 = 8; | constexpr size_t kBinSizeUnit8 = 8; | ||||
| constexpr size_t kBinSizeUnit16 = 16; | |||||
| constexpr size_t kBinSizeUnit26 = 26; | |||||
| constexpr size_t kBinSizeUnit32 = 32; | constexpr size_t kBinSizeUnit32 = 32; | ||||
| constexpr size_t kBinSizeUnit128 = 128; | constexpr size_t kBinSizeUnit128 = 128; | ||||
| constexpr size_t kBinSizeUnit256 = 256; | |||||
| constexpr size_t kBinSizeUnit512 = 512; | |||||
| constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold | |||||
| constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold | |||||
| constexpr size_t kKByteSize = 1024; | constexpr size_t kKByteSize = 1024; | ||||
| constexpr size_t kMByteSize = 1048576; // 1024 * 1024 | constexpr size_t kMByteSize = 1048576; // 1024 * 1024 | ||||
| constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 | constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 | ||||
| static const uint32_t kNumBins = 8; | |||||
| static const uint32_t kNumBins = 7; | |||||
| class MemoryAllocator; | class MemoryAllocator; | ||||
| @@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { | if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { | ||||
| GELOGI("Need to process multi batch for compute graph."); | |||||
| GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType().c_str()); | |||||
| GetLocalOmgContext().need_multi_batch = true; | GetLocalOmgContext().need_multi_batch = true; | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -348,7 +348,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
| for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | ||||
| (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | ||||
| } | } | ||||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]."); | |||||
| } | } | ||||
| GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | ||||
| @@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| } | } | ||||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
| compute_graph->GetGraphID(), subgraph, | compute_graph->GetGraphID(), subgraph, | ||||
| compute_graph->GetName(), session_id, | |||||
| compute_graph->GetName(), session_id, GetContext().WorkStreamId(), | |||||
| GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
| if (!f.valid()) { | if (!f.valid()) { | ||||
| GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
| @@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| } | } | ||||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
| compute_graph->GetGraphID(), subgraph, | compute_graph->GetGraphID(), subgraph, | ||||
| compute_graph->GetName(), session_id, | |||||
| compute_graph->GetName(), session_id, GetContext().WorkStreamId(), | |||||
| GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
| if (!f.valid()) { | if (!f.valid()) { | ||||
| GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
| @@ -734,8 +734,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
| } | } | ||||
| Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | ||||
| GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||||
| static_cast<int>(mode), ge::GetContext().DeviceId()); | |||||
| GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", | |||||
| session_id, graph_id, static_cast<int>(mode), ge::GetContext().DeviceId()); | |||||
| rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -758,7 +758,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { | |||||
| GE_TIMESTAMP_START(RunCustomPass); | GE_TIMESTAMP_START(RunCustomPass); | ||||
| GraphPtr graph = std::const_pointer_cast<Graph>(const_graph); | GraphPtr graph = std::const_pointer_cast<Graph>(const_graph); | ||||
| GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", | |||||
| GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail", | |||||
| comp_graph->GetName().c_str()); | comp_graph->GetName().c_str()); | ||||
| GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); | GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -776,7 +776,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()), | GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()), | ||||
| "BuildJsonObject Failed") | "BuildJsonObject Failed") | ||||
| GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s", | |||||
| GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s.", | |||||
| compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(), | compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(), | ||||
| compute_graph->GetName().c_str()); | compute_graph->GetName().c_str()); | ||||
| GE_DUMP(compute_graph, "PreRunBegin"); | GE_DUMP(compute_graph, "PreRunBegin"); | ||||
| @@ -797,7 +797,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| if (run_optimize_original_graph) { | if (run_optimize_original_graph) { | ||||
| Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id); | Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||||
| GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| @@ -869,7 +869,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
| // release rts generate context | // release rts generate context | ||||
| RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId()); | RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "PreRun Failed."); | |||||
| GELOGE(ret, "PreRun Failed. graph_id:%u", graph_node->GetGraphId()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| @@ -1209,7 +1209,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const | |||||
| Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | ||||
| GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | ||||
| GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||||
| GELOGD("[BuildGraph] start to build graph, graph_id:%u.", graph_id); | |||||
| if (inputs.empty()) { | if (inputs.empty()) { | ||||
| GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | ||||
| } | } | ||||
| @@ -1241,7 +1241,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen | |||||
| ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed!"); | |||||
| GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u", graph_id); | |||||
| return GE_GRAPH_PRERUN_FAILED; | return GE_GRAPH_PRERUN_FAILED; | ||||
| } | } | ||||
| @@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | ||||
| new (std::nothrow) | new (std::nothrow) | ||||
| LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | ||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
| GE_TIMESTAMP_START(after_merge_passes); | GE_TIMESTAMP_START(after_merge_passes); | ||||
| auto ret = after_merge_passes.Run(compute_graph); | auto ret = after_merge_passes.Run(compute_graph); | ||||
| @@ -2509,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||||
| const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
| const std::string &root_graph_name, | const std::string &root_graph_name, | ||||
| uint64_t session_id, | uint64_t session_id, | ||||
| uint64_t work_stream_id, | |||||
| const GEThreadLocalContext &ge_context) { | const GEThreadLocalContext &ge_context) { | ||||
| if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | ||||
| GetContext().SetWorkStreamId(work_stream_id); | |||||
| GetContext().SetSessionId(session_id); | GetContext().SetSessionId(session_id); | ||||
| GetThreadLocalContext() = ge_context; | GetThreadLocalContext() = ge_context; | ||||
| graph_manager->UpdateLocalOmgContext(root_graph_id); | graph_manager->UpdateLocalOmgContext(root_graph_id); | ||||
| @@ -2557,7 +2558,8 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge | |||||
| uint64_t session_id, RunAsyncCallback callback) { | uint64_t session_id, RunAsyncCallback callback) { | ||||
| GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size()); | GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size()); | ||||
| bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, GetThreadLocalContext(), callback})); | |||||
| bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, | |||||
| GetContext().WorkStreamId(), GetThreadLocalContext(), callback})); | |||||
| if (!ret) { | if (!ret) { | ||||
| GELOGE(FAILED, "[GraphManager] Run graph async failed, graph_id=%u.", graph_id); | GELOGE(FAILED, "[GraphManager] Run graph async failed, graph_id=%u.", graph_id); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -2644,6 +2646,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| GELOGI("A new loop start."); | GELOGI("A new loop start."); | ||||
| GetContext().SetWorkStreamId(args.work_stream_id); | |||||
| GetContext().SetSessionId(args.session_id); | GetContext().SetSessionId(args.session_id); | ||||
| GetThreadLocalContext() = args.context; | GetThreadLocalContext() = args.context; | ||||
| graph_manager->UpdateLocalOmgContext(args.graph_id); | graph_manager->UpdateLocalOmgContext(args.graph_id); | ||||
| @@ -2725,8 +2728,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| ge_root_model = graph_node->GetGeRootModel(); | ge_root_model = graph_node->GetGeRootModel(); | ||||
| } | } | ||||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor, | |||||
| ge_root_model, GetThreadLocalContext(), args.callback })); | |||||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id, | |||||
| args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); | |||||
| GELOGI("Loop end."); | GELOGI("Loop end."); | ||||
| } | } | ||||
| } | } | ||||
| @@ -2825,6 +2828,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
| GELOGI("A new loop start."); | GELOGI("A new loop start."); | ||||
| GetContext().SetWorkStreamId(args.work_stream_id); | |||||
| GetContext().SetSessionId(args.session_id); | GetContext().SetSessionId(args.session_id); | ||||
| GetThreadLocalContext() = args.context; | GetThreadLocalContext() = args.context; | ||||
| graph_manager->UpdateLocalOmgContext(args.graph_id); | graph_manager->UpdateLocalOmgContext(args.graph_id); | ||||
| @@ -196,6 +196,7 @@ class GraphManager { | |||||
| GraphId graph_id; | GraphId graph_id; | ||||
| std::vector<ge::InputTensorInfo> input_tensor; | std::vector<ge::InputTensorInfo> input_tensor; | ||||
| uint64_t session_id; | uint64_t session_id; | ||||
| uint64_t work_stream_id; | |||||
| GEThreadLocalContext context; | GEThreadLocalContext context; | ||||
| RunAsyncCallback callback; | RunAsyncCallback callback; | ||||
| }; | }; | ||||
| @@ -204,6 +205,7 @@ class GraphManager { | |||||
| GraphNodePtr graph_node; | GraphNodePtr graph_node; | ||||
| GraphId graph_id; | GraphId graph_id; | ||||
| uint64_t session_id; | uint64_t session_id; | ||||
| uint64_t work_stream_id; | |||||
| std::vector<ge::InputTensorInfo> input_tensor; | std::vector<ge::InputTensorInfo> input_tensor; | ||||
| GeRootModelPtr ge_root_model; | GeRootModelPtr ge_root_model; | ||||
| GEThreadLocalContext context; | GEThreadLocalContext context; | ||||
| @@ -221,6 +223,7 @@ class GraphManager { | |||||
| const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
| const std::string &root_graph_name, | const std::string &root_graph_name, | ||||
| uint64_t session_id, | uint64_t session_id, | ||||
| uint64_t work_stream_id, | |||||
| const GEThreadLocalContext &ge_context); | const GEThreadLocalContext &ge_context); | ||||
| Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | ||||
| void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| @@ -139,7 +140,6 @@ class MemoryAllocator { | |||||
| using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | ||||
| class CachingAllocator; | class CachingAllocator; | ||||
| class RdmaPoolAllocator; | class RdmaPoolAllocator; | ||||
| class HostMemAllocator; | |||||
| class MemManager { | class MemManager { | ||||
| public: | public: | ||||
| MemManager(); | MemManager(); | ||||
| @@ -24,9 +24,9 @@ namespace { | |||||
| constexpr uint32_t kValidInputNodeOutputNum = 1; | constexpr uint32_t kValidInputNodeOutputNum = 1; | ||||
| constexpr int32_t kAssignRefInputIndex = 0; | constexpr int32_t kAssignRefInputIndex = 0; | ||||
| constexpr int32_t kAssignValueInputIndex = 1; | constexpr int32_t kAssignValueInputIndex = 1; | ||||
| static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | } | ||||
| Status AssignRemovePass::Run(NodePtr &node) { | Status AssignRemovePass::Run(NodePtr &node) { | ||||
| @@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | return FoldingPass::RunOpKernel(node, inputs, outputs); | ||||
| } | } | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_ge_constant_folding_; | return statistic_of_ge_constant_folding_; | ||||
| } | } | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_op_constant_folding_; | return statistic_of_op_constant_folding_; | ||||
| } | } | ||||
| @@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { | |||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| GELOGI("FlowCtrl pass begin"); | |||||
| GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); | |||||
| bool graph_change = false; | bool graph_change = false; | ||||
| // 1. Add FP/BP flow ctrl (big cycle) | // 1. Add FP/BP flow ctrl (big cycle) | ||||
| for (auto &node : compute_graph->GetDirectNode()) { | for (auto &node : compute_graph->GetDirectNode()) { | ||||
| @@ -80,6 +80,16 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { | |||||
| graph_change = true; | graph_change = true; | ||||
| } | } | ||||
| } | } | ||||
| // add edge operation below depends on memcpy node in itertor loop set single stream,or may cause block | |||||
| for (auto &active_node : active_nodes_in_iter_loop_) { | |||||
| auto ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(), | |||||
| assign_add_node_in_fpbp_loop_->GetInControlAnchor()); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGW("add control edge between iter_loop_node:%s and fpbp_loop_node:%s fail, may cause block", | |||||
| active_node->GetName().c_str(), assign_add_node_in_fpbp_loop_->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed"); | GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed"); | ||||
| return graph_change ? SUCCESS : NOT_CHANGED; | return graph_change ? SUCCESS : NOT_CHANGED; | ||||
| } | } | ||||
| @@ -279,16 +289,16 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co | |||||
| * loopIncrement | * loopIncrement | ||||
| */ | */ | ||||
| // Insert AssignAdd node | // Insert AssignAdd node | ||||
| NodePtr assign_add_node = | |||||
| assign_add_node_in_fpbp_loop_ = | |||||
| InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); | InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); | ||||
| if (assign_add_node == nullptr || switch_node == nullptr) { | |||||
| if (assign_add_node_in_fpbp_loop_ == nullptr || switch_node == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "assign add node or switch node is null"); | GELOGE(PARAM_INVALID, "assign add node or switch node is null"); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| string active_name = switch_node->GetName() + "_StreamActive"; | string active_name = switch_node->GetName() + "_StreamActive"; | ||||
| // add attr for stream assign model to break branch. | // add attr for stream assign model to break branch. | ||||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed"); | |||||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node_in_fpbp_loop_, active_name), "set stream label failed"); | |||||
| // used for stream assign to find true branch | // used for stream assign to find true branch | ||||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | ||||
| @@ -304,13 +314,15 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co | |||||
| DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); | DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); | ||||
| // add ctrl edges | // add ctrl edges | ||||
| graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor()); | |||||
| graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), | |||||
| assign_add_node_in_fpbp_loop_->GetInControlAnchor()); | |||||
| if (add_ret != GRAPH_SUCCESS) { | if (add_ret != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret); | GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| add_ret = GraphUtils::AddEdge(assign_add_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); | |||||
| add_ret = GraphUtils::AddEdge(assign_add_node_in_fpbp_loop_->GetOutControlAnchor(), | |||||
| active_node->GetInControlAnchor()); | |||||
| if (add_ret != GRAPH_SUCCESS) { | if (add_ret != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret); | GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -533,6 +545,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | ||||
| // used for stream assign to find active stream | // used for stream assign to find active stream | ||||
| GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); | GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); | ||||
| active_nodes_in_iter_loop_.push_back(active_node); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -142,6 +142,9 @@ class FlowCtrlPass : public GraphPass { | |||||
| /// false: only one dataSet exist | /// false: only one dataSet exist | ||||
| /// | /// | ||||
| bool CheckMultiDataSet(ComputeGraphPtr &compute_graph); | bool CheckMultiDataSet(ComputeGraphPtr &compute_graph); | ||||
| NodePtr assign_add_node_in_fpbp_loop_ = nullptr; | |||||
| std::vector<NodePtr> active_nodes_in_iter_loop_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) { | |||||
| /// @param [in] ge::OutDataAnchorPtr in_node | /// @param [in] ge::OutDataAnchorPtr in_node | ||||
| /// @return ge::NodePtr | /// @return ge::NodePtr | ||||
| /// | /// | ||||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | ||||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
| @@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | ||||
| GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | ||||
| @@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | ||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | ||||
| @@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr | |||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | ||||
| const OutDataAnchorPtr &var_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| const OutDataAnchorPtr &var_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | ||||
| GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG | |||||
| /// @param [in] ge::OutDataAnchorPtr variable node out anchor | /// @param [in] ge::OutDataAnchorPtr variable node out anchor | ||||
| /// @return ge::NodePtr | /// @return ge::NodePtr | ||||
| /// | /// | ||||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph , return nullptr); | |||||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | |||||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
| if (pre_op_desc == nullptr) { | if (pre_op_desc == nullptr) { | ||||
| @@ -23,9 +23,9 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| constexpr uint32_t kInplaceSupportOutputIndex = 0; | constexpr uint32_t kInplaceSupportOutputIndex = 0; | ||||
| constexpr uint32_t kInplaceSupportOutputNum = 1; | constexpr uint32_t kInplaceSupportOutputNum = 1; | ||||
| static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | } | ||||
| Status InplaceSupportCheckPass::Run(NodePtr &node) { | Status InplaceSupportCheckPass::Run(NodePtr &node) { | ||||
| GELOGD("InplaceSupportCheckPass running"); | GELOGD("InplaceSupportCheckPass running"); | ||||
| @@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { | |||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); | ||||
| return GE_GRAPH_PARAM_NULLPTR; | return GE_GRAPH_PARAM_NULLPTR; | ||||
| } | } | ||||
| GELOGI("NetOutputPass Run."); | |||||
| GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str()); | |||||
| NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); | NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); | ||||
| // save user targets node | // save user targets node | ||||
| SaveAndRemoveTargets(graph); | SaveAndRemoveTargets(graph); | ||||
| @@ -82,14 +82,41 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) { | |||||
| } | } | ||||
| } | } | ||||
| if (to_be_deleted) { | if (to_be_deleted) { | ||||
| GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str()); | |||||
| auto ret = PassUtils::UnlinkNodeWithControlCopy(node, kReshapeShapeIndex); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "DimensionAdjustPass unlink node with control copy fail."); | |||||
| return ret; | |||||
| } | |||||
| auto ret = TryRemoveConstShapeInput(node); | |||||
| GE_CHK_STATUS_RET_NOLOG(ret); | |||||
| GELOGI("NoUseReshapeRemovePass remove useless reshape node:%s", node->GetName().c_str()); | |||||
| return IsolateAndDeleteNode(node, {kReshapeDataIndex}); | return IsolateAndDeleteNode(node, {kReshapeDataIndex}); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_node) { | |||||
| auto shape_input_anchor = reshape_node->GetInDataAnchor(kReshapeShapeIndex); | |||||
| if (shape_input_anchor == nullptr) { | |||||
| return SUCCESS; | |||||
| } | |||||
| GE_CHECK_NOTNULL(shape_input_anchor->GetPeerOutAnchor()); | |||||
| auto shape_input = shape_input_anchor->GetPeerOutAnchor()->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(shape_input); | |||||
| if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) { | |||||
| return SUCCESS; | |||||
| } | |||||
| // op(x) const(shape) | |||||
| // \ / | |||||
| // reshape | |||||
| // const input can unlink but should copy control_dependency | |||||
| auto ret = PassUtils::UnlinkNodeWithControlCopy(reshape_node, kReshapeShapeIndex); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Unlink node %s with control copy failed.", shape_input->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| // remove const without any data_output | |||||
| if (shape_input->GetOutDataNodesSize() == 0) { | |||||
| auto ret = IsolateAndDeleteNode(shape_input, {}); | |||||
| GE_CHK_GRAPH_STATUS_RET(ret, "Fail to remove node %s", shape_input->GetName().c_str()); | |||||
| GELOGI("Remove useless shape input const %s.", shape_input->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -32,6 +32,9 @@ class NoUseReshapeRemovePass : public BaseNodePass { | |||||
| /// @author | /// @author | ||||
| /// | /// | ||||
| Status Run(ge::NodePtr &node) override; | Status Run(ge::NodePtr &node) override; | ||||
| private: | |||||
| Status TryRemoveConstShapeInput(NodePtr &reshape_node); | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -27,12 +27,11 @@ | |||||
| namespace ge { | namespace ge { | ||||
| Status PrunePass::Run(ge::ComputeGraphPtr graph) { | Status PrunePass::Run(ge::ComputeGraphPtr graph) { | ||||
| GELOGD("PrunePass Start"); | |||||
| GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str()); | |||||
| if (graph == nullptr) { | if (graph == nullptr) { | ||||
| GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); | GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); | ||||
| return GE_GRAPH_ISNULL; | return GE_GRAPH_ISNULL; | ||||
| } | } | ||||
| std::vector<NodePtr> out_nodes; | std::vector<NodePtr> out_nodes; | ||||
| std::unordered_set<NodePtr> nodes; | std::unordered_set<NodePtr> nodes; | ||||
| for (NodePtr &node_ptr : graph->GetDirectNode()) { | for (NodePtr &node_ptr : graph->GetDirectNode()) { | ||||
| @@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) { | |||||
| out_nodes.push_back(node_ptr); | out_nodes.push_back(node_ptr); | ||||
| } | } | ||||
| } | } | ||||
| if (out_nodes.empty()) { | if (out_nodes.empty()) { | ||||
| GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); | GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); | ||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| @@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | ||||
| switch(key) { | |||||
| switch (key) { | |||||
| case kReshapeType: { | case kReshapeType: { | ||||
| bool is_shape_unknown = false; | bool is_shape_unknown = false; | ||||
| if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | ||||
| @@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra | |||||
| // Break Move and follow, Link Data and follow. | // Break Move and follow, Link Data and follow. | ||||
| const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | ||||
| const auto in_anchors =out_anchor->GetPeerInDataAnchors(); | |||||
| const auto in_anchors = out_anchor->GetPeerInDataAnchors(); | |||||
| for (const auto in_anchor : in_anchors) { | for (const auto in_anchor : in_anchors) { | ||||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | ||||
| GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| @@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, | |||||
| Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | ||||
| vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | ||||
| // check both mode and shape_range option are all enabled | // check both mode and shape_range option are all enabled | ||||
| auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | ||||
| bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | ||||
| if (!enable_dynamic_execute_mode) { | if (!enable_dynamic_execute_mode) { | ||||
| @@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) { | |||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
| const std::map<string, string> &graph_option) { | |||||
| // Get shape range of input in dynamic_execute mode | // Get shape range of input in dynamic_execute mode | ||||
| vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec; | |||||
| vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec; | |||||
| auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | ||||
| GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | ||||
| compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | ||||
| @@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) { | |||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input, | |||||
| const std::map<string, string> &graph_option) { | |||||
| compute_graph_->SetInputSize(user_input.size()); | compute_graph_->SetInputSize(user_input.size()); | ||||
| if (user_input.empty()) { | if (user_input.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -23,7 +23,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/debug/log.h" | #include "common/debug/log.h" | ||||
| #include "common/debug/memory_dumper.h" | #include "common/debug/memory_dumper.h" | ||||
| #include "common/model_parser/base.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "common/string_util.h" | #include "common/string_util.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| @@ -63,8 +63,8 @@ class GraphPrepare { | |||||
| Status CheckRefOp(); | Status CheckRefOp(); | ||||
| Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | ||||
| Status AdjustDataOpOutput(const NodePtr &node); | Status AdjustDataOpOutput(const NodePtr &node); | ||||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
| Status CheckConstOp(); | Status CheckConstOp(); | ||||
| Status VerifyConstOp(const NodePtr &node); | Status VerifyConstOp(const NodePtr &node); | ||||
| Status CheckUserInput(const std::vector<GeTensor> &user_input); | Status CheckUserInput(const std::vector<GeTensor> &user_input); | ||||
| @@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con | |||||
| /// @return 0: true/false | /// @return 0: true/false | ||||
| /// | /// | ||||
| GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | ||||
| const std::string &input_format); | |||||
| const std::string &input_format); | |||||
| } // namespace multibatch | } // namespace multibatch | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -16,6 +16,8 @@ | |||||
| #include "host_kernels/slice_kernel.h" | #include "host_kernels/slice_kernel.h" | ||||
| #include <set> | |||||
| #include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
| #include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| @@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3; | |||||
| const size_t kSliceInputIndexX = 0; | const size_t kSliceInputIndexX = 0; | ||||
| const size_t kSliceInputIndexBegin = 1; | const size_t kSliceInputIndexBegin = 1; | ||||
| const size_t kSliceInputIndexSize = 2; | const size_t kSliceInputIndexSize = 2; | ||||
| const std::set<ge::DataType> kSupportedDataTypeToLength = { | |||||
| DT_BOOL, | |||||
| DT_INT64, | |||||
| DT_UINT64, | |||||
| DT_FLOAT, | |||||
| DT_INT32, | |||||
| DT_UINT32, | |||||
| DT_INT8, | |||||
| DT_UINT8, | |||||
| DT_INT16, | |||||
| DT_UINT16, | |||||
| DT_FLOAT16, | |||||
| DT_DOUBLE, | |||||
| DT_DUAL, | |||||
| DT_DUAL_SUB_INT8, | |||||
| DT_DUAL_SUB_UINT8, | |||||
| DT_COMPLEX64, | |||||
| DT_COMPLEX128, | |||||
| DT_QINT8, | |||||
| DT_QINT16, | |||||
| DT_QINT32, | |||||
| DT_QUINT8, | |||||
| DT_QUINT16, | |||||
| }; | |||||
| } // namespace | } // namespace | ||||
| Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input, | Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input, | ||||
| @@ -56,6 +82,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso | |||||
| // data type in input_x | // data type in input_x | ||||
| auto data_type = x_->GetTensorDesc().GetDataType(); | auto data_type = x_->GetTensorDesc().GetDataType(); | ||||
| // check supported | |||||
| if (kSupportedDataTypeToLength.count(data_type) == 0) { | |||||
| GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| return NOT_CHANGED; | |||||
| } | |||||
| uint32_t type_size = 0; | |||||
| bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size); | |||||
| if (!is_success) { | |||||
| return NOT_CHANGED; | |||||
| } | |||||
| // check data type of begin and size | // check data type of begin and size | ||||
| if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { | if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { | ||||
| GELOGW("Data type of begin and size for slice are not DT_INT32."); | GELOGW("Data type of begin and size for slice are not DT_INT32."); | ||||
| @@ -69,7 +105,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso | |||||
| GE_CHECK_NOTNULL(begin_data); | GE_CHECK_NOTNULL(begin_data); | ||||
| GE_CHECK_NOTNULL(size_data); | GE_CHECK_NOTNULL(size_data); | ||||
| size_t data_size = x_->GetData().size() / sizeof(int32_t); | |||||
| size_t data_size = x_->GetData().size() / type_size; | |||||
| size_t begin_size = begin->GetData().size() / sizeof(int32_t); | size_t begin_size = begin->GetData().size() / sizeof(int32_t); | ||||
| size_t size_size = size->GetData().size() / sizeof(int32_t); | size_t size_size = size->GetData().size() / sizeof(int32_t); | ||||
| const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape(); | const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape(); | ||||
| @@ -62,9 +62,9 @@ struct GraphExecutionContext { | |||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| rtContext_t rt_context = nullptr; | rtContext_t rt_context = nullptr; | ||||
| rtContext_t rt_gen_context = nullptr; | rtContext_t rt_gen_context = nullptr; | ||||
| std::unique_ptr<CallbackManager> callback_manager; | |||||
| std::unique_ptr<CallbackManager> callback_manager = nullptr; | |||||
| NpuMemoryAllocator *allocator = nullptr; | NpuMemoryAllocator *allocator = nullptr; | ||||
| mutable std::unique_ptr<HybridProfiler> profiler; | |||||
| mutable std::unique_ptr<HybridProfiler> profiler = nullptr; | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| bool trace_enabled = false; | bool trace_enabled = false; | ||||
| bool dump_enabled = false; | bool dump_enabled = false; | ||||
| @@ -26,6 +26,7 @@ namespace hybrid { | |||||
| namespace { | namespace { | ||||
| const int kDataOutputIndex = 0; | const int kDataOutputIndex = 0; | ||||
| const size_t kMinimumPiplineStages = 2; | const size_t kMinimumPiplineStages = 2; | ||||
| const int kDefaultLoopCount = 10; | |||||
| } | } | ||||
| HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | ||||
| : model_(model), run_flag_(false) { | : model_(model), run_flag_(false) { | ||||
| @@ -150,7 +151,7 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| GELOGI("HybridModel will execute in pipeline mode"); | GELOGI("HybridModel will execute in pipeline mode"); | ||||
| auto iter_per_run = std::getenv("ITER_NUM"); | auto iter_per_run = std::getenv("ITER_NUM"); | ||||
| if (iter_per_run) { | if (iter_per_run) { | ||||
| args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, 10)); | |||||
| args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, kDefaultLoopCount)); | |||||
| } | } | ||||
| ret = pipe_executor_->Execute(args); | ret = pipe_executor_->Execute(args); | ||||
| } else { | } else { | ||||
| @@ -250,7 +251,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||||
| if (k >= shape.GetDimNum()) { | if (k >= shape.GetDimNum()) { | ||||
| break; | break; | ||||
| } | } | ||||
| if (shape.GetDim(k) < range[k].first || shape.GetDim(k) > range[k].second) { | |||||
| // range[k].second can be -1 | |||||
| if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) { | |||||
| GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]", | GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]", | ||||
| input_index, k, shape.GetDim(k), range[k].first, range[k].second); | input_index, k, shape.GetDim(k), range[k].first, range[k].second); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -8,6 +8,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | namespace { | ||||
| constexpr int kNumExecutors = 2; | constexpr int kNumExecutors = 2; | ||||
| const int kMinLoopCount = 2; | |||||
| const int kIntBase = 10; | const int kIntBase = 10; | ||||
| const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | ||||
| } | } | ||||
| @@ -208,7 +209,7 @@ Status HybridModelPipelineExecutor::InitStageExecutors() { | |||||
| Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | ||||
| int loop_count = args.num_loops; | int loop_count = args.num_loops; | ||||
| GE_CHECK_GE(loop_count, 2); | |||||
| GE_CHECK_GE(loop_count, kMinLoopCount); | |||||
| auto &inputs = args.inputs; | auto &inputs = args.inputs; | ||||
| auto &input_desc = args.input_desc; | auto &input_desc = args.input_desc; | ||||
| @@ -30,7 +30,7 @@ class NodeTask; | |||||
| struct GraphExecutionContext; | struct GraphExecutionContext; | ||||
| class SubgraphContext; | class SubgraphContext; | ||||
| class TaskContext; | class TaskContext; | ||||
| class NodeState; | |||||
| struct NodeState; | |||||
| class ShapeFuture { | class ShapeFuture { | ||||
| public: | public: | ||||
| @@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | ||||
| GetContext().SetSessionId(context_->context_id); | GetContext().SetSessionId(context_->context_id); | ||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | ||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | ||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -70,8 +70,6 @@ class NodeDoneCallback { | |||||
| Status PrepareConstInputs(const NodeItem &node_item); | Status PrepareConstInputs(const NodeItem &node_item); | ||||
| Status DumpDynamicNode(); | Status DumpDynamicNode(); | ||||
| Status ProfilingReport(); | Status ProfilingReport(); | ||||
| Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
| std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||||
| Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | ||||
| std::vector<TaskDescInfo> &task_desc_info); | std::vector<TaskDescInfo> &task_desc_info); | ||||
| GraphExecutionContext *graph_context_; | GraphExecutionContext *graph_context_; | ||||
| @@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
| } | } | ||||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| task_desc_info = context_->GetProfilingTaskDescInfo(); | task_desc_info = context_->GetProfilingTaskDescInfo(); | ||||
| context_->ClearProfilingTaskDescInfo(); | context_->ClearProfilingTaskDescInfo(); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
| std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(model); | |||||
| GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||||
| compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||||
| context_->ClearProfilingGraphDescInfo(); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| for (auto &tmp_compute_graph_info : compute_graph_info) { | |||||
| // default | |||||
| if (op_desc->GetAllInputsSize() == 0) { | |||||
| tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
| tmp_compute_graph_info.input_shape = { {0} }; | |||||
| tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
| } | |||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
| if (input_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
| } | |||||
| if (op_desc->GetOutputsSize() == 0) { | |||||
| tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
| tmp_compute_graph_info.output_shape = { {0} }; | |||||
| tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
| } | |||||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
| } | |||||
| for (auto &tmp_task_desc : task_desc_info) { | |||||
| // save op input and output info | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||||
| return profiling_ret; | return profiling_ret; | ||||
| } | } | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||||
| if (profiling_ret != RT_ERROR_NONE) { | |||||
| GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||||
| return profiling_ret; | |||||
| } | |||||
| auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -323,6 +277,8 @@ Status NodeDoneCallback::OnNodeDone() { | |||||
| node_item.NodeName().c_str()); | node_item.NodeName().c_str()); | ||||
| } | } | ||||
| // release workspace | |||||
| context_->ReleaseWorkspace(); | |||||
| // release inputs | // release inputs | ||||
| for (int i = 0; i < context_->NumInputs(); ++i) { | for (int i = 0; i < context_->NumInputs(); ++i) { | ||||
| context_->ReleaseInput(i); | context_->ReleaseInput(i); | ||||
| @@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| op_index = task_def.kernel_ex().op_index(); | op_index = task_def.kernel_ex().op_index(); | ||||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | } else if (task_type == RT_MODEL_TASK_HCCL) { | ||||
| op_index = task_def.kernel_hccl().op_index(); | op_index = task_def.kernel_hccl().op_index(); | ||||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| op_index = task_def.kernel_with_handle().context().op_index(); | |||||
| } else { | } else { | ||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| continue; | continue; | ||||
| @@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| } | } | ||||
| auto &node = iter->second; | auto &node = iter->second; | ||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | ||||
| } | } | ||||
| @@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return FAILED; | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | ||||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; | |||||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
| } // namespace | } // namespace | ||||
| TbeHandleHolder::TbeHandleHolder(void *bin_handle) | |||||
| : bin_handle_(bin_handle) {} | |||||
| TbeHandleHolder::~TbeHandleHolder() { | |||||
| if (bin_handle_ != nullptr) { | |||||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||||
| } | |||||
| } | |||||
| bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||||
| return ret.second; | |||||
| } | |||||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | ||||
| @@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| if (rt_ret != RT_ERROR_NONE || is_single_op_) { | if (rt_ret != RT_ERROR_NONE || is_single_op_) { | ||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||||
| rtDevBinary_t binary; | rtDevBinary_t binary; | ||||
| std::string json_string; | std::string json_string; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | ||||
| @@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | ||||
| kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | ||||
| } else { | } else { | ||||
| GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | kernel_store.ReferTBEHandle(stub_name_.c_str()); | ||||
| } | } | ||||
| std::string kernel_name; | std::string kernel_name; | ||||
| @@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||||
| "[%s] Failed to validate task def: [%s]", | |||||
| op_desc.GetName().c_str(), | |||||
| task_def.DebugString().c_str()); | |||||
| Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { | |||||
| TbeHandleRegistry ®istry = TbeHandleRegistry::GetInstance(); | |||||
| auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| void *bin_handle = nullptr; | |||||
| GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); | |||||
| rtDevBinary_t binary; | |||||
| std::string json_string; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), | |||||
| GELOGI("Get original type of session_graph_id.")); | |||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| binary.version = 0; | |||||
| binary.data = tbe_kernel->GetBinData(); | |||||
| binary.length = tbe_kernel->GetBinDataSize(); | |||||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||||
| GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); | |||||
| handle_ = bin_handle; | |||||
| auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_)); | |||||
| if (holder == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
| } | |||||
| if (!registry.AddHandle(std::move(holder))) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| stub_name_ = kernel_def.stub_func(); | stub_name_ = kernel_def.stub_func(); | ||||
| GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | ||||
| GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | ||||
| args_size_ = kernel_def.args_size(); | args_size_ = kernel_def.args_size(); | ||||
| block_dim_ = kernel_def.block_dim(); | block_dim_ = kernel_def.block_dim(); | ||||
| // malloc args memory | // malloc args memory | ||||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | args_.reset(new(std::nothrow) uint8_t[args_size_]); | ||||
| GE_CHECK_NOTNULL(args_); | GE_CHECK_NOTNULL(args_); | ||||
| if (kernel_def.args().size() < args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (err != EOK) { | if (err != EOK) { | ||||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | ||||
| @@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef | |||||
| block_dim_, | block_dim_, | ||||
| arg_base_, | arg_base_, | ||||
| args_size_); | args_size_); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); | |||||
| const domi::KernelContext &context = kernel_with_handle.context(); | |||||
| GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); | |||||
| original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_"; | |||||
| node_info_ = kernel_with_handle.node_info() + "/"; | |||||
| args_size_ = kernel_with_handle.args_size(); | |||||
| block_dim_ = kernel_with_handle.block_dim(); | |||||
| // malloc args memory | |||||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | |||||
| GE_CHECK_NOTNULL(args_); | |||||
| if (kernel_with_handle.args().size() < args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); | |||||
| if (err != EOK) { | |||||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (context.args_offset().size() < sizeof(uint16_t)) { | |||||
| GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
| uint32_t offset = *args_offset_buffer; | |||||
| if (offset > args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[%s] Arg offset out of range. offset = %u, arg size = %u", | |||||
| GetName().c_str(), | |||||
| offset, | |||||
| args_size_); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); | |||||
| max_arg_count_ = (args_size_ - offset) / sizeof(void *); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||||
| "[%s] Failed to validate task def: [%s]", | |||||
| op_desc.GetName().c_str(), | |||||
| task_def.DebugString().c_str()); | |||||
| if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (task_type != RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); | GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const domi::KernelContext &context = kernel_def.context(); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type != ccKernelType::TE) { | if (kernel_type != ccKernelType::TE) { | ||||
| GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); | GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); | ||||
| @@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | |||||
| } | } | ||||
| Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { | Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { | ||||
| if (tiling_buffer_ != nullptr) { | |||||
| if (is_dynamic_) { | |||||
| return UpdateTilingInfo(context); | return UpdateTilingInfo(context); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||||
| clear_atomic_ = tiling_info.clear_atomic; | clear_atomic_ = tiling_info.clear_atomic; | ||||
| tiling_data_ = tiling_info.tiling_data.str(); | tiling_data_ = tiling_info.tiling_data.str(); | ||||
| tiling_key_ = tiling_info.tiling_key; | |||||
| GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); | |||||
| if (tiling_data_.empty()) { | if (tiling_data_.empty()) { | ||||
| GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); | |||||
| GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (tiling_buffer_ == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!"); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -238,6 +351,9 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) | |||||
| GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | ||||
| "Failed calc tiling data of node %s.", | "Failed calc tiling data of node %s.", | ||||
| node->GetName().c_str()); | node->GetName().c_str()); | ||||
| if (is_single_op_) { | |||||
| tiling_info.clear_atomic = false; | |||||
| } | |||||
| GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -296,16 +412,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | |||||
| } | } | ||||
| Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| if (handle_ != nullptr) { | |||||
| std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_); | |||||
| std::string kernel_info = node_info_ + std::to_string(tiling_key_); | |||||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||||
| block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr, | |||||
| stream, kernel_info.c_str())); | |||||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||||
| block_dim_); | |||||
| } else { | |||||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | ||||
| bool dynamic_supported = false; | |||||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); | |||||
| if (!dynamic_supported) { | |||||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_); | |||||
| if (!is_dynamic_) { | |||||
| GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); | GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -314,22 +440,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | |||||
| int64_t max_size = -1; | int64_t max_size = -1; | ||||
| (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); | (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); | ||||
| GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | ||||
| if (max_size <= 0) { | |||||
| if (max_size < 0) { | |||||
| GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); | GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||||
| if (max_size > 0) { | |||||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||||
| } else { | |||||
| GELOGD("op_param_size is 0, no need to create tiling buffer."); | |||||
| } | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool AiCoreOpTask::IsDynamicShapeSupported() { | bool AiCoreOpTask::IsDynamicShapeSupported() { | ||||
| return tiling_buffer_ != nullptr; | |||||
| return is_dynamic_; | |||||
| } | } | ||||
| const std::string &AiCoreOpTask::GetName() const { | const std::string &AiCoreOpTask::GetName() const { | ||||
| @@ -28,6 +28,32 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| class TbeHandleHolder { | |||||
| public: | |||||
| TbeHandleHolder(void *bin_handle); | |||||
| ~TbeHandleHolder(); | |||||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||||
| void *GetBinHandle() { return bin_handle_; } | |||||
| private: | |||||
| friend class TbeHandleRegistry; | |||||
| void *bin_handle_ = nullptr; | |||||
| }; | |||||
| class TbeHandleRegistry { | |||||
| public: | |||||
| static TbeHandleRegistry &GetInstance() { | |||||
| static TbeHandleRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder); | |||||
| private: | |||||
| std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_; | |||||
| }; | |||||
| class AiCoreOpTask { | class AiCoreOpTask { | ||||
| public: | public: | ||||
| AiCoreOpTask() = default; | AiCoreOpTask() = default; | ||||
| @@ -67,6 +93,9 @@ class AiCoreOpTask { | |||||
| Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | ||||
| Status InitTilingInfo(const OpDesc &op_desc); | Status InitTilingInfo(const OpDesc &op_desc); | ||||
| Status RegisterTbeHandle(const OpDesc &op_desc); | Status RegisterTbeHandle(const OpDesc &op_desc); | ||||
| Status RegisterKernelHandle(const OpDesc &op_desc); | |||||
| Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); | |||||
| Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); | |||||
| std::string stub_name_; | std::string stub_name_; | ||||
| void *stub_func_ = nullptr; | void *stub_func_ = nullptr; | ||||
| @@ -76,6 +105,11 @@ class AiCoreOpTask { | |||||
| bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
| bool is_single_op_ = false; | bool is_single_op_ = false; | ||||
| std::vector<int> output_indices_to_skip_; | std::vector<int> output_indices_to_skip_; | ||||
| string original_kernel_key_; | |||||
| string node_info_; | |||||
| uint32_t tiling_key_ = 0; | |||||
| void *handle_ = nullptr; | |||||
| bool is_dynamic_ = false; | |||||
| }; | }; | ||||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
| @@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return FAILED; | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | ||||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
| @@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context, | |||||
| TaskContext::~TaskContext() { | TaskContext::~TaskContext() { | ||||
| GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); | GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); | ||||
| for (auto ws_addr : workspaces_) { | |||||
| execution_context_->allocator->Deallocate(ws_addr); | |||||
| } | |||||
| // release output | // release output | ||||
| for (int i = 0; i < NumOutputs(); ++i) { | for (int i = 0; i < NumOutputs(); ++i) { | ||||
| auto output_tensor = MutableOutput(i); | auto output_tensor = MutableOutput(i); | ||||
| @@ -49,6 +45,13 @@ TaskContext::~TaskContext() { | |||||
| } | } | ||||
| } | } | ||||
| void TaskContext::ReleaseWorkspace() { | |||||
| GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str()); | |||||
| for (auto ws_addr : workspaces_) { | |||||
| execution_context_->allocator->Deallocate(ws_addr); | |||||
| } | |||||
| } | |||||
| std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state, | std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state, | ||||
| GraphExecutionContext *execution_context, | GraphExecutionContext *execution_context, | ||||
| SubgraphContext *subgraph_context) { | SubgraphContext *subgraph_context) { | ||||
| @@ -512,21 +515,21 @@ Status TaskContext::Synchronize() { | |||||
| } | } | ||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | ||||
| uint32_t task_type, uint32_t block_dim) { | |||||
| const std::string &task_type, uint32_t block_dim) { | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | ||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
| const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
| GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
| std::string op_name = op_desc->GetName(); | |||||
| std::string dynamic_model_name = model->GetModelName(); | std::string dynamic_model_name = model->GetModelName(); | ||||
| TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
| tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.op_name = op_desc->GetName(); | |||||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
| tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
| tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| @@ -543,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||||
| return node_state_; | return node_state_; | ||||
| } | } | ||||
| Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
| const NodeItem &node_item = GetNodeItem(); | |||||
| auto op_desc = node_item.GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | |||||
| const HybridModel *model = graph_context->model; | |||||
| GE_CHECK_NOTNULL(model); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||||
| tmp_compute_graph_info.task_id = task_id; | |||||
| tmp_compute_graph_info.stream_id = stream_id; | |||||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -56,6 +56,7 @@ class TaskContext { | |||||
| void ReleaseInputsAndOutputs(); | void ReleaseInputsAndOutputs(); | ||||
| bool NeedCallback(); | bool NeedCallback(); | ||||
| void ReleaseInput(int index); | void ReleaseInput(int index); | ||||
| void ReleaseWorkspace(); | |||||
| const TensorValue *GetInput(int index) const; | const TensorValue *GetInput(int index) const; | ||||
| const TensorValue *GetOutput(int index) const; | const TensorValue *GetOutput(int index) const; | ||||
| TensorValue *MutableOutput(int index); | TensorValue *MutableOutput(int index); | ||||
| @@ -112,13 +113,10 @@ class TaskContext { | |||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim); | |||||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
| const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||||
| Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||||
| void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||||
| private: | private: | ||||
| TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
| NodeState *node_state, | NodeState *node_state, | ||||
| @@ -140,7 +138,6 @@ class TaskContext { | |||||
| uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
| uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
| std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -62,19 +62,18 @@ using std::shared_ptr; | |||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| namespace { | |||||
| static bool is_dynamic_input = false; | static bool is_dynamic_input = false; | ||||
| const char *const kModeSupport = "only support 0(model to framework model), " | const char *const kModeSupport = "only support 0(model to framework model), " | ||||
| "1(framework model to json), 3(only pre-check), " | "1(framework model to json), 3(only pre-check), " | ||||
| "5(pbtxt to json), 6(display model info)"; | "5(pbtxt to json), 6(display model info)"; | ||||
| const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | ||||
| static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
| static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
| static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
| const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
| const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
| const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
| // limit available mem size 2G | // limit available mem size 2G | ||||
| const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | ||||
| } // namespace | |||||
| DEFINE_string(model, "", "The model file."); | DEFINE_string(model, "", "The model file."); | ||||
| DEFINE_string(output, "", "The output file path&name."); | DEFINE_string(output, "", "The output file path&name."); | ||||
| @@ -1326,6 +1325,7 @@ int init(int argc, char* argv[]) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -23,7 +23,7 @@ | |||||
| #include "common/debug/memory_dumper.h" | #include "common/debug/memory_dumper.h" | ||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/model_parser/base.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "common/model_saver.h" | #include "common/model_saver.h" | ||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "common/string_util.h" | #include "common/string_util.h" | ||||
| @@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js | |||||
| } else { | } else { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ||||
| {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
| } | } | ||||
| if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
| @@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| string model_name; | |||||
| string op_name; | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| uint32_t model_id; | uint32_t model_id; | ||||
| uint32_t block_dim; | |||||
| if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||||
| if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||||
| tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| tmp_task_desc_info.model_name = model_name; | |||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.block_dim = block_dim; | |||||
| tmp_task_desc_info.task_id = task_id; | |||||
| tmp_task_desc_info.stream_id = stream_id; | |||||
| tmp_task_desc_info.shape_type = shape_type; | tmp_task_desc_info.shape_type = shape_type; | ||||
| tmp_task_desc_info.cur_iter_num = 0; | tmp_task_desc_info.cur_iter_num = 0; | ||||
| tmp_task_desc_info.task_type = op_task->GetTaskType(); | tmp_task_desc_info.task_type = op_task->GetTaskType(); | ||||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
| profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||||
| profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| @@ -30,8 +30,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManag | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name, | ||||
| const ModelData &model_data, | const ModelData &model_data, | ||||
| void *stream, | void *stream, | ||||
| SingleOp **single_op) { | |||||
| GELOGI("GetOpFromModel in. model name = %s", model_name.c_str()); | |||||
| SingleOp **single_op, | |||||
| const uint64_t model_id) { | |||||
| GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); | |||||
| if (single_op == nullptr) { | if (single_op == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null"); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null"); | ||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
| @@ -99,7 +100,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { | |||||
| Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, | Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, | ||||
| const ModelData &model_data, | const ModelData &model_data, | ||||
| void *stream, | void *stream, | ||||
| DynamicSingleOp **single_op) { | |||||
| DynamicSingleOp **single_op, | |||||
| const uint64_t model_id) { | |||||
| GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); | |||||
| if (!tiling_func_registered_) { | if (!tiling_func_registered_) { | ||||
| RegisterTilingFunc(); | RegisterTilingFunc(); | ||||
| } | } | ||||
| @@ -37,12 +37,14 @@ class SingleOpManager { | |||||
| Status GetOpFromModel(const std::string &model_name, | Status GetOpFromModel(const std::string &model_name, | ||||
| const ge::ModelData &model_data, | const ge::ModelData &model_data, | ||||
| void *stream, | void *stream, | ||||
| SingleOp **single_op); | |||||
| SingleOp **single_op, | |||||
| const uint64_t model_id); | |||||
| Status GetDynamicOpFromModel(const std::string &model_name, | Status GetDynamicOpFromModel(const std::string &model_name, | ||||
| const ge::ModelData &model_data, | const ge::ModelData &model_data, | ||||
| void *stream, | void *stream, | ||||
| DynamicSingleOp **dynamic_single_op); | |||||
| DynamicSingleOp **dynamic_single_op, | |||||
| const uint64_t model_id); | |||||
| StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); | StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); | ||||
| @@ -190,7 +190,7 @@ Status SingleOpModel::LoadAllNodes() { | |||||
| auto node = nodes.at(i); | auto node = nodes.at(i); | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| op_list_[i] = node; | |||||
| op_list_[op_desc->GetId()] = node; | |||||
| auto op_type = op_desc->GetType(); | auto op_type = op_desc->GetType(); | ||||
| GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str()); | GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str()); | ||||
| @@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s | |||||
| if (kernel_type == ccKernelType::TE) { | if (kernel_type == ccKernelType::TE) { | ||||
| GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); | |||||
| auto ret = BuildKernelTask(task_def, &tbe_task); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { | |||||
| } | } | ||||
| } | } | ||||
| Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { | |||||
| Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { | |||||
| GE_CHECK_NOTNULL(task); | GE_CHECK_NOTNULL(task); | ||||
| const auto &context = kernel_def.context(); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto iter = op_list_.find(context.op_index()); | auto iter = op_list_.find(context.op_index()); | ||||
| if (iter == op_list_.end()) { | if (iter == op_list_.end()) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); | ||||
| @@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| } | } | ||||
| auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def); | |||||
| auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); | |||||
| auto ret = builder.BuildTask(*tbe_task, model_params_); | auto ret = builder.BuildTask(*tbe_task, model_params_); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| delete tbe_task; | delete tbe_task; | ||||
| @@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||||
| } | } | ||||
| Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const auto &context = kernel_def.context(); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type == ccKernelType::TE) { | if (kernel_type == ccKernelType::TE) { | ||||
| GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | |||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | |||||
| tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
| single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| @@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | ||||
| task_def.DebugString().c_str()); | task_def.DebugString().c_str()); | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| if (single_op.op_task_ != nullptr) { | if (single_op.op_task_ != nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | ||||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
| #include "single_op/stream_resource.h" | #include "single_op/stream_resource.h" | ||||
| @@ -67,7 +66,7 @@ class SingleOpModel { | |||||
| Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | ||||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | ||||
| Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | |||||
| Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | |||||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | ||||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "aicpu/common/aicpu_task_struct.h" | #include "aicpu/common/aicpu_task_struct.h" | ||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
| #include "common/profiling/profiling_manager.h" | |||||
| #include "common/formats/formats.h" | #include "common/formats/formats.h" | ||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| @@ -93,6 +94,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size | |||||
| op_desc_ = op_desc; | op_desc_ = op_desc; | ||||
| } | } | ||||
| void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||||
| const OpDescPtr &op_desc, | |||||
| const domi::KernelDefWithHandle &kernel_def_with_handle) { | |||||
| SetKernelArgs(std::move(args), arg_size, block_dim, op_desc); | |||||
| original_kernel_key_ = kernel_def_with_handle.original_kernel_key(); | |||||
| node_info_ = kernel_def_with_handle.node_info(); | |||||
| } | |||||
| void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } | void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } | ||||
| void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | ||||
| @@ -100,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||||
| model_id_ = model_id; | model_id_ = model_id; | ||||
| } | } | ||||
| Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||||
| uint32_t &block_dim) { | |||||
| model_name = model_name_; | |||||
| model_id = model_id_; | |||||
| block_dim = block_dim_; | |||||
| Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
| op_name = op_desc_->GetName(); | |||||
| string op_name = op_desc_->GetName(); | |||||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| model_id = model_id_; | |||||
| task_desc_info.model_name = model_name_; | |||||
| task_desc_info.block_dim = block_dim_; | |||||
| task_desc_info.task_id = task_id; | |||||
| task_desc_info.stream_id = stream_id; | |||||
| task_desc_info.op_name = op_name; | |||||
| task_desc_info.op_type = op_desc_->GetType(); | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| @@ -145,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
| const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
| TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
| if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
| @@ -163,7 +186,11 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||||
| const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | ||||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
| const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
| void TbeOpTask::SetHandle(void *handle) { | |||||
| this->handle_ = handle; | |||||
| } | |||||
| Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
| @@ -204,8 +231,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||||
| } | } | ||||
| block_dim_ = run_info.block_dim; | block_dim_ = run_info.block_dim; | ||||
| tiling_data_ = run_info.tiling_data.str(); | tiling_data_ = run_info.tiling_data.str(); | ||||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, | |||||
| tiling_data_.size()); | |||||
| tiling_key_ = run_info.tiling_key; | |||||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | |||||
| tiling_data_.size(), tiling_key_); | |||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -329,8 +357,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| } | } | ||||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
| if (handle_ == nullptr) { | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
| } else { | |||||
| std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | |||||
| std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | |||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||||
| stream, kernel_info.c_str())); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -363,7 +400,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
| num_inputs_, | num_inputs_, | ||||
| num_outputs_, | num_outputs_, | ||||
| unknown_type_)); | unknown_type_)); | ||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Malloc aicpu_ext_handle mem failed!"); | |||||
| Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -401,7 +439,7 @@ Status AiCpuBaseTask::SetInputConst() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| rtStream_t stream) { | rtStream_t stream) { | ||||
| GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | ||||
| @@ -811,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
| return DoUpdateArgTable(param, false); | return DoUpdateArgTable(param, false); | ||||
| } | } | ||||
| uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
| const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
| void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
| arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | ||||
| @@ -43,7 +43,7 @@ class OpTask { | |||||
| const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
| void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
| Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | const OpDescPtr &GetOpdesc() const {return op_desc_;} | ||||
| Status OpenDump(rtStream_t stream); | Status OpenDump(rtStream_t stream); | ||||
| virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | ||||
| @@ -52,7 +52,7 @@ class OpTask { | |||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &output_buffers, | std::vector<DataBuffer> &output_buffers, | ||||
| rtStream_t stream); | rtStream_t stream); | ||||
| virtual uint32_t GetTaskType() const; | |||||
| virtual const std::string &GetTaskType() const; | |||||
| protected: | protected: | ||||
| Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | ||||
| @@ -78,6 +78,8 @@ class TbeOpTask : public OpTask { | |||||
| void SetSmDesc(void *sm_desc); | void SetSmDesc(void *sm_desc); | ||||
| void SetStubFunc(const std::string &name, const void *stub_func); | void SetStubFunc(const std::string &name, const void *stub_func); | ||||
| void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | ||||
| void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | |||||
| Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<GeTensorDesc> &output_desc) override; | const vector<GeTensorDesc> &output_desc) override; | ||||
| @@ -86,7 +88,8 @@ class TbeOpTask : public OpTask { | |||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
| uint32_t GetTaskType() const override; | |||||
| const std::string &GetTaskType() const override; | |||||
| void SetHandle(void *handle); | |||||
| private: | private: | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| @@ -107,6 +110,11 @@ class TbeOpTask : public OpTask { | |||||
| std::string tiling_data_; | std::string tiling_data_; | ||||
| std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
| NodePtr node_; | NodePtr node_; | ||||
| uint32_t tiling_key_ = 0; | |||||
| void* handle_ = nullptr; | |||||
| std::string original_kernel_key_; | |||||
| std::string node_info_; | |||||
| }; | }; | ||||
| class AiCpuBaseTask : public OpTask { | class AiCpuBaseTask : public OpTask { | ||||
| @@ -115,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||||
| ~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
| UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
| Status UpdateArgTable(const SingleOpModelParam ¶m) override; | Status UpdateArgTable(const SingleOpModelParam ¶m) override; | ||||
| uint32_t GetTaskType() const override; | |||||
| const std::string &GetTaskType() const override; | |||||
| protected: | protected: | ||||
| Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| @@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() { | |||||
| } | } | ||||
| } | } | ||||
| HandleHolder::HandleHolder(void *bin_handle) | |||||
| : bin_handle_(bin_handle) {} | |||||
| HandleHolder::~HandleHolder() { | |||||
| if (bin_handle_ != nullptr) { | |||||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||||
| } | |||||
| } | |||||
| const char *KernelBinRegistry::GetUnique(const string &stub_func) { | const char *KernelBinRegistry::GetUnique(const string &stub_func) { | ||||
| std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
| auto it = unique_stubs_.find(stub_func); | auto it = unique_stubs_.find(stub_func); | ||||
| @@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr< | |||||
| return ret.second; | return ret.second; | ||||
| } | } | ||||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) | |||||
| bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) { | |||||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||||
| return ret.second; | |||||
| } | |||||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) | |||||
| : node_(node), | : node_(node), | ||||
| op_desc_(node->GetOpDesc()), | op_desc_(node->GetOpDesc()), | ||||
| kernel_def_(kernel_def), | |||||
| task_def_(task_def), | |||||
| kernel_def_(task_def.kernel()), | |||||
| kernel_def_with_handle_(task_def.kernel_with_handle()), | |||||
| stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | ||||
| Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | ||||
| @@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||||
| binary.data = kernel_bin.GetBinData(); | binary.data = kernel_bin.GetBinData(); | ||||
| binary.length = kernel_bin.GetBinDataSize(); | binary.length = kernel_bin.GetBinDataSize(); | ||||
| binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | ||||
| auto ret = rtDevBinaryRegister(&binary, bin_handle); | |||||
| Status ret = 0; | |||||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| ret = rtRegisterAllKernel(&binary, bin_handle); | |||||
| } else { | |||||
| ret = rtDevBinaryRegister(&binary, bin_handle); | |||||
| } | |||||
| if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
| GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||||
| GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||||
| param.core_type, static_cast<int>(ret)); | param.core_type, static_cast<int>(ret)); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam | |||||
| Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, | Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, | ||||
| const SingleOpModelParam ¶m) { | const SingleOpModelParam ¶m) { | ||||
| std::string kernel_name; | |||||
| GetKernelName(op_desc_, kernel_name); | |||||
| void *handle = nullptr; | void *handle = nullptr; | ||||
| auto ret = DoRegisterBinary(tbe_kernel, &handle, param); | auto ret = DoRegisterBinary(tbe_kernel, &handle, param); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| *bin_handle = handle; | |||||
| return SUCCESS; | |||||
| } | |||||
| ret = DoRegisterMeta(handle); | ret = DoRegisterMeta(handle); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::string kernel_name; | |||||
| GetKernelName(op_desc_, kernel_name); | |||||
| ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); | ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GE_CHK_RT(rtDevBinaryUnRegister(handle)); | GE_CHK_RT(rtDevBinaryUnRegister(handle)); | ||||
| @@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); | auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); | ||||
| if (ret == SUCCESS) { | |||||
| holder->SetBinHandle(bin_handle); | |||||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||||
| // should not happen. only one thread can reach here | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| holder->SetBinHandle(bin_handle); | |||||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||||
| // should not happen. only one thread can reach here | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | } | ||||
| } | } | ||||
| @@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m) { | |||||
| GELOGD("RegisterKernelWithHandle begin."); | |||||
| HandleRegistry ®istry = HandleRegistry::GetInstance(); | |||||
| auto tbe_kernel = GetTbeKernel(op_desc_); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", | |||||
| op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| void *bin_handle = nullptr; | |||||
| auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| handle_ = bin_handle; | |||||
| auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_)); | |||||
| if (holder == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
| } | |||||
| if (!registry.AddHandle(std::move(holder))) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { | Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { | ||||
| const std::string &sm_desc_str = kernel_def_.sm_desc(); | const std::string &sm_desc_str = kernel_def_.sm_desc(); | ||||
| if (sm_desc_str.empty()) { | if (sm_desc_str.empty()) { | ||||
| @@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||||
| } | } | ||||
| } | } | ||||
| auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet)); | |||||
| return rtRet; | |||||
| auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | } | ||||
| rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| (void)rtMemFreeManaged(*sm_desc); | (void)rtMemFreeManaged(*sm_desc); | ||||
| GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet)); | |||||
| return rtRet; | |||||
| GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | } | ||||
| } | } | ||||
| @@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | ||||
| GE_CHECK_NOTNULL(args); | GE_CHECK_NOTNULL(args); | ||||
| auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet)); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| const domi::KernelContext &context = kernel_def_.context(); | const domi::KernelContext &context = kernel_def_.context(); | ||||
| @@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | ||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | ||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | ||||
| rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet)); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| } | } | ||||
| task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||||
| const OpDescPtr &op_desc) { | |||||
| size_t arg_size = kernel_def_with_handle_.args_size(); | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | |||||
| const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||||
| const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
| uint16_t offset = *args_offset_tmp; | |||||
| bool is_dynamic = false; | |||||
| (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||||
| if (is_dynamic) { | |||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||||
| } else { | |||||
| // copy args | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | |||||
| } | |||||
| task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
| kernel_def_with_handle_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | ||||
| GELOGD("Build tbe task begin"); | GELOGD("Build tbe task begin"); | ||||
| auto ret = SetKernelArgs(task, param, op_desc_); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
| auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||||
| SetKernelArgs(task, param, op_desc_); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = RegisterKernel(task, param); | |||||
| ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | |||||
| RegisterKernel(task, param); | |||||
| task.SetHandle(handle_); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); | auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); | ||||
| GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); | GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); | ||||
| void *stub_func = nullptr; | |||||
| auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||||
| if (rtRet != SUCCESS) { | |||||
| GELOGE(rtRet, "rtGetFunctionByName failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| if (task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| void *stub_func = nullptr; | |||||
| auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||||
| if (rt_ret != SUCCESS) { | |||||
| GELOGE(rt_ret, "rtGetFunctionByName failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| task.SetStubFunc(stub_name_, stub_func); | |||||
| } | } | ||||
| task.SetStubFunc(stub_name_, stub_func); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
| int64_t max_size = -1; | int64_t max_size = -1; | ||||
| (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | ||||
| GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | ||||
| if (max_size <= 0) { | |||||
| if (max_size < 0) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| void *tiling_buffer = nullptr; | void *tiling_buffer = nullptr; | ||||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||||
| if (max_size > 0) { | |||||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||||
| } | |||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -42,6 +42,19 @@ class KernelHolder { | |||||
| std::shared_ptr<ge::OpKernelBin> kernel_bin_; | std::shared_ptr<ge::OpKernelBin> kernel_bin_; | ||||
| }; | }; | ||||
| class HandleHolder { | |||||
| public: | |||||
| HandleHolder(void *bin_handle); | |||||
| ~HandleHolder(); | |||||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||||
| void *GetBinHandle() { return bin_handle_; } | |||||
| private: | |||||
| friend class HandleRegistry; | |||||
| void *bin_handle_ = nullptr; | |||||
| }; | |||||
| class KernelBinRegistry { | class KernelBinRegistry { | ||||
| public: | public: | ||||
| static KernelBinRegistry &GetInstance() { | static KernelBinRegistry &GetInstance() { | ||||
| @@ -61,9 +74,22 @@ class KernelBinRegistry { | |||||
| std::mutex mutex_; | std::mutex mutex_; | ||||
| }; | }; | ||||
| class HandleRegistry { | |||||
| public: | |||||
| static HandleRegistry &GetInstance() { | |||||
| static HandleRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| bool AddHandle(std::unique_ptr<HandleHolder> &&holder); | |||||
| private: | |||||
| std::set<std::unique_ptr<HandleHolder>> registered_handles_; | |||||
| }; | |||||
| class TbeTaskBuilder { | class TbeTaskBuilder { | ||||
| public: | public: | ||||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); | |||||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); | |||||
| ~TbeTaskBuilder() = default; | ~TbeTaskBuilder() = default; | ||||
| Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
| @@ -71,9 +97,11 @@ class TbeTaskBuilder { | |||||
| private: | private: | ||||
| Status InitTilingInfo(TbeOpTask &task); | Status InitTilingInfo(TbeOpTask &task); | ||||
| Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | ||||
| Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||||
| Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | ||||
| Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
| Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m); | |||||
| Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, | Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, | ||||
| const SingleOpModelParam ¶m); | const SingleOpModelParam ¶m); | ||||
| Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | ||||
| @@ -83,8 +111,11 @@ class TbeTaskBuilder { | |||||
| const NodePtr node_; | const NodePtr node_; | ||||
| const OpDescPtr op_desc_; | const OpDescPtr op_desc_; | ||||
| const domi::TaskDef &task_def_; | |||||
| const domi::KernelDef &kernel_def_; | const domi::KernelDef &kernel_def_; | ||||
| const domi::KernelDefWithHandle &kernel_def_with_handle_; | |||||
| const std::string stub_name_; | const std::string stub_name_; | ||||
| void *handle_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString | |||||
| // Finalize GE, release all resources | // Finalize GE, release all resources | ||||
| GE_FUNC_VISIBILITY Status GEFinalize(); | GE_FUNC_VISIBILITY Status GEFinalize(); | ||||
| GE_FUNC_VISIBILITY std::string GEGetErrorMsg(); | |||||
| GE_FUNC_VISIBILITY std::string GEGetWarningMsg(); | |||||
| class GE_FUNC_VISIBILITY Session { | class GE_FUNC_VISIBILITY Session { | ||||
| public: | public: | ||||
| ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ||||
| @@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
| // profiling data | // profiling data | ||||
| const uint32_t kTaskTypeAicore = 0; | |||||
| const uint32_t kTaskTypeAicpu = 1; | |||||
| const uint32_t kTaskTypeInvalid = 0xFFFF; | |||||
| const std::string kTaskTypeAicore = "AI_CORE"; | |||||
| const std::string kTaskTypeAicpu = "AI_CPU"; | |||||
| const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||||
| // Data cache, including data address and length | // Data cache, including data address and length | ||||
| struct DataBuffer { | struct DataBuffer { | ||||
| @@ -251,27 +251,19 @@ struct Options { | |||||
| struct TaskDescInfo { | struct TaskDescInfo { | ||||
| std::string model_name; | std::string model_name; | ||||
| std::string op_name; | std::string op_name; | ||||
| std::string op_type; | |||||
| uint32_t block_dim; | uint32_t block_dim; | ||||
| uint32_t task_id; | uint32_t task_id; | ||||
| uint32_t stream_id; | uint32_t stream_id; | ||||
| std::string shape_type; | std::string shape_type; | ||||
| int64_t cur_iter_num; | int64_t cur_iter_num; | ||||
| uint32_t task_type; | |||||
| }; | |||||
| // Profiling info of graph | |||||
| struct ComputeGraphDescInfo { | |||||
| std::string model_name; | |||||
| std::string op_name; | |||||
| std::string op_type; | |||||
| std::string task_type; | |||||
| std::vector<Format> input_format; | std::vector<Format> input_format; | ||||
| std::vector<std::vector<int64_t>> input_shape; | std::vector<std::vector<int64_t>> input_shape; | ||||
| std::vector<DataType> input_data_type; | std::vector<DataType> input_data_type; | ||||
| std::vector<Format> output_format; | std::vector<Format> output_format; | ||||
| std::vector<std::vector<int64_t>> output_shape; | std::vector<std::vector<int64_t>> output_shape; | ||||
| std::vector<DataType> output_data_type; | std::vector<DataType> output_data_type; | ||||
| uint32_t task_id; | |||||
| uint32_t stream_id; | |||||
| }; | }; | ||||
| struct OpDescInfo { | struct OpDescInfo { | ||||
| @@ -260,12 +260,18 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||||
| static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, | static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, | ||||
| SingleOp **single_op); | SingleOp **single_op); | ||||
| static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream, | |||||
| SingleOp **single_op, const uint64_t model_id); | |||||
| static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | ||||
| std::vector<DataBuffer> &outputs); | std::vector<DataBuffer> &outputs); | ||||
| static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | ||||
| DynamicSingleOp **single_op); | DynamicSingleOp **single_op); | ||||
| static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||||
| DynamicSingleOp **single_op, const uint64_t model_id); | |||||
| static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | ||||
| const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &outputs); | std::vector<DataBuffer> &outputs); | ||||
| @@ -55,7 +55,8 @@ typedef void *OpTensor_t; | |||||
| /// @return 0 for success / others for fail | /// @return 0 for success / others for fail | ||||
| /// | /// | ||||
| GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | ||||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); | |||||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, | |||||
| const char *om_file); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME | |||||
| /// \param var_info [in] host variable addr infos. | /// \param var_info [in] host variable addr infos. | ||||
| /// \param mem_type [in] memory type for rdma pool. | /// \param mem_type [in] memory type for rdma pool. | ||||
| /// \return Status result of function | /// \return Status result of function | ||||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | |||||
| rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
| /// | /// | ||||
| /// \param tensor_info [in] description for tensor stored shared memory. | /// \param tensor_info [in] description for tensor stored shared memory. | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit a2b80cb22a62a6757c7dd31e684ca632e0b79268 | |||||
| Subproject commit 4a9bfd772cad72ff281a2e21d59b8d225a26789c | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit cfabf622b803d5957563a73652a0ce5086aab99d | |||||
| Subproject commit 86162f60807c063f7344f902e443fc99657be637 | |||||
| @@ -19,7 +19,6 @@ add_subdirectory(depends/cce) | |||||
| add_subdirectory(depends/slog) | add_subdirectory(depends/slog) | ||||
| add_subdirectory(depends/mmpa) | add_subdirectory(depends/mmpa) | ||||
| add_subdirectory(depends/runtime) | add_subdirectory(depends/runtime) | ||||
| add_subdirectory(depends/omg) | |||||
| add_subdirectory(depends/hccl) | add_subdirectory(depends/hccl) | ||||
| add_subdirectory(depends/profiler) | add_subdirectory(depends/profiler) | ||||
| add_subdirectory(depends/error_manager) | add_subdirectory(depends/error_manager) | ||||
| @@ -29,6 +29,11 @@ include_directories(${GE_CODE_DIR}/inc/framework) | |||||
| include_directories(${GE_CODE_DIR}/metadef/inc/external) | include_directories(${GE_CODE_DIR}/metadef/inc/external) | ||||
| add_library(mmpa_stub SHARED ${SRCS}) | add_library(mmpa_stub SHARED ${SRCS}) | ||||
| target_compile_options(mmpa_stub PRIVATE | |||||
| -g | |||||
| ) | |||||
| target_link_libraries(mmpa_stub PRIVATE | target_link_libraries(mmpa_stub PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -231,8 +231,12 @@ INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone) | |||||
| INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen) | INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen) | ||||
| { | { | ||||
| INT32 ret = EN_OK; | INT32 ret = EN_OK; | ||||
| char *pRet = realpath(path, realPath); | |||||
| if (pRet == NULL) { | |||||
| if (path == nullptr || realPath == nullptr || realPathLen < MMPA_MAX_PATH) { | |||||
| return EN_INVALID_PARAM; | |||||
| } | |||||
| char *ptr = realpath(path, realPath); | |||||
| if (ptr == nullptr) { | |||||
| ret = EN_ERROR; | ret = EN_ERROR; | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -260,7 +264,7 @@ INT32 mmDlclose(VOID *handle) | |||||
| CHAR *mmDlerror() | CHAR *mmDlerror() | ||||
| { | { | ||||
| return ""; | |||||
| return dlerror(); | |||||
| } | } | ||||
| INT32 mmDladdr(VOID *addr, mmDlInfo *info) | INT32 mmDladdr(VOID *addr, mmDlInfo *info) | ||||
| @@ -1,59 +0,0 @@ | |||||
| # Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| #cmake_minimum_required(VERSION 2.8) | |||||
| project(OMG_CCE) | |||||
| set(CMAKE_CXX_STANDARD 11) | |||||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | |||||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) | |||||
| include_directories(${GE_CODE_DIR}/inc) | |||||
| include_directories(${GE_CODE_DIR}/metadef/inc) | |||||
| include_directories(${GE_CODE_DIR}/inc/framework) | |||||
| include_directories(${GE_CODE_DIR}/metadef/inc/graph) | |||||
| include_directories(${GE_CODE_DIR}/inc/external) | |||||
| include_directories(${GE_CODE_DIR}/metadef/inc/external) | |||||
| include_directories(${GE_CODE_DIR}/metadef/inc/external/graph) | |||||
| include_directories(${GE_CODE_DIR}/ge) | |||||
| include_directories(${CMAKE_BINARY_DIR}) | |||||
| include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||||
| set(PROTO_LIST | |||||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/task.proto" | |||||
| ) | |||||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||||
| set(SRCS | |||||
| # "${GE_CODE_DIR}/src/ge/common/util.cc" | |||||
| "src/omg_stub.cc" | |||||
| ) | |||||
| add_library(omg_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS}) | |||||
| target_compile_definitions(omg_stub PRIVATE | |||||
| google=ascend_private | |||||
| ) | |||||
| target_link_libraries(omg_stub PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| -Wl,--no-as-needed | |||||
| ascend_protobuf | |||||
| -Wl,--as-needed | |||||
| c_sec | |||||
| json | |||||
| ) | |||||
| @@ -1,878 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <map> | |||||
| #include <fstream> | |||||
| #include <unordered_map> | |||||
| #include <google/protobuf/io/coded_stream.h> | |||||
| #include <google/protobuf/io/zero_copy_stream_impl.h> | |||||
| #include "mmpa/mmpa_api.h" | |||||
| #include "common/debug/log.h" | |||||
| #include "common/debug/memory_dumper.h" | |||||
| #include "common/types.h" | |||||
| #include "common/util.h" | |||||
| #include "common/string_util.h" | |||||
| #include "common/properties_manager.h" | |||||
| #include "common/model_parser/base.h" | |||||
| #include "graph/model.h" | |||||
| #include "cce/dnn.h" | |||||
| #include "ge/ge_api_types.h" | |||||
| #include "framework/common/ge_types.h" | |||||
| #include "graph/utils/op_desc_utils.h" | |||||
| #include "common/profiling/profiling_manager.h" | |||||
| using domi::domiTensorFormat_t; | |||||
| using namespace cce; | |||||
| using namespace ge; | |||||
| struct PROC_PARAM { | |||||
| uint8_t *model_name; | |||||
| // ISV Ek buffer | |||||
| uint8_t *model_key; | |||||
| uint32_t model_key_len; | |||||
| // ISV root certificate buffer | |||||
| uint8_t *root_cert; | |||||
| uint32_t root_cert_len; | |||||
| // ISV private key buffer | |||||
| uint8_t *pri_key; | |||||
| uint32_t pri_key_len; | |||||
| // Raw AI Module Image buffer | |||||
| uint8_t *ai_image; | |||||
| uint32_t ai_image_len; | |||||
| // ISV HW key buffer | |||||
| uint8_t *hw_key; | |||||
| uint32_t hw_key_len; | |||||
| }; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| using namespace ge; | |||||
| namespace { | |||||
| const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR"; | |||||
| const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework"; | |||||
| const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result"; | |||||
| const char FILE_SEPARATE[] = "/"; | |||||
| } // namespace | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| namespace ge { | |||||
| struct GeModelPartition { | |||||
| ModelPartitionType type_ = MODEL_DEF; | |||||
| uint8_t *data_ = nullptr; | |||||
| size_t size_ = 0; | |||||
| GeModelPartition() = default; | |||||
| GeModelPartition(const GeModelPartition &partition){}; | |||||
| GeModelPartition &operator=(const GeModelPartition &partition) = delete; | |||||
| ~GeModelPartition() { | |||||
| if (data_ != nullptr) { | |||||
| delete[] data_; | |||||
| data_ = nullptr; | |||||
| } | |||||
| } | |||||
| Status SetData(uint8_t *data, size_t size) { | |||||
| size_ = size; | |||||
| data_ = new (std::nothrow) uint8_t[size](); | |||||
| errno_t err; | |||||
| err = memcpy_s(data_, size_, data, size); | |||||
| if (err) { | |||||
| GELOGE(ge::FAILED, "[GeModel Partition] Error occur when copy GeModel Partition data."); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetType(ModelPartitionType type) { | |||||
| type_ = type; | |||||
| return SUCCESS; | |||||
| } | |||||
| }; | |||||
| struct OmFileContext { | |||||
| vector<GeModelPartition> partition_datas_; | |||||
| vector<char> partition_table_; | |||||
| uint32_t model_data_len_; | |||||
| }; | |||||
| class SubGraphInfo; | |||||
| using SubGraphInfoPtr = std::shared_ptr<ge::SubGraphInfo>; | |||||
| using GeModelPartitionPtr = std::shared_ptr<GeModelPartition>; | |||||
| using ModelPtr = std::shared_ptr<ge::Model>; | |||||
| class GeModel { | |||||
| public: | |||||
| explicit GeModel(const ModelPtr &model_ptr); | |||||
| ~GeModel() = default; | |||||
| GeModel(const GeModel &other) = delete; | |||||
| GeModel &operator=(const GeModel &other) = delete; | |||||
| ModelPtr GetModelPtr() const; | |||||
| Status AddPartition(uint8_t *data, size_t size, ModelPartitionType type); | |||||
| Status GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition); | |||||
| uint8_t GetPlatformType() const; | |||||
| void SetPlatformType(const uint8_t platform_type) { platform_type_ = platform_type; } | |||||
| private: | |||||
| std::map<ModelPartitionType, GeModelPartitionPtr> partitions_; | |||||
| ModelPtr model_ = nullptr; | |||||
| uint8_t platform_type_ = {0}; | |||||
| }; | |||||
| using GeModelPtr = std::shared_ptr<ge::GeModel>; | |||||
| GeModel::GeModel(const ModelPtr &model_ptr) { this->model_ = model_ptr; } | |||||
| ModelPtr GeModel::GetModelPtr() const { return this->model_; } | |||||
| uint8_t GeModel::GetPlatformType() const { return platform_type_; } | |||||
| Status GeModel::AddPartition(uint8_t *data, size_t size, ModelPartitionType type) { | |||||
| if (size == 0) { | |||||
| return FAILED; | |||||
| } | |||||
| if (data == nullptr) { | |||||
| return FAILED; | |||||
| } | |||||
| auto iter = partitions_.find(type); | |||||
| if (iter != partitions_.end()) { | |||||
| return FAILED; | |||||
| } | |||||
| GeModelPartitionPtr partition = nullptr; | |||||
| GE_MAKE_SHARED(partition = std::make_shared<ge::GeModelPartition>(), return FAILED); | |||||
| Status ret = partition->SetType(type); | |||||
| if (ret != SUCCESS) { | |||||
| return FAILED; | |||||
| } | |||||
| ret = partition->SetData(data, size); | |||||
| if (ret != SUCCESS) { | |||||
| return FAILED; | |||||
| } | |||||
| partitions_.insert(std::pair<ModelPartitionType, GeModelPartitionPtr>(type, partition)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeModel::GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition) { | |||||
| auto iter = partitions_.find(type); | |||||
| if (iter == partitions_.end()) { | |||||
| return FAILED; | |||||
| } | |||||
| partition = iter->second; | |||||
| return SUCCESS; | |||||
| } | |||||
| class OmFileSaveHelper { | |||||
| public: | |||||
| OmFileSaveHelper(); | |||||
| ~OmFileSaveHelper(); | |||||
| vector<GeModelPartition> &GetModelPartitions(); | |||||
| ModelPartitionTable *GetPartitionTable(); | |||||
| ModelFileHeader model_header_; | |||||
| ModelFileHeader &GetModelFileHeader() { return model_header_; } | |||||
| void AddPartition(GeModelPartition &partition); | |||||
| private: | |||||
| OmFileContext context_; | |||||
| }; | |||||
| OmFileSaveHelper::OmFileSaveHelper() {} | |||||
| OmFileSaveHelper::~OmFileSaveHelper() {} | |||||
| vector<GeModelPartition> &OmFileSaveHelper::GetModelPartitions() { | |||||
| static std::vector<GeModelPartition> tmp; | |||||
| return tmp; | |||||
| } | |||||
| ModelPartitionTable *OmFileSaveHelper::GetPartitionTable() { return nullptr; } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OmFileSaveHelper::AddPartition(GeModelPartition &partition) { | |||||
| context_.partition_datas_.push_back(partition); | |||||
| context_.model_data_len_ += partition.size_; | |||||
| } | |||||
| class ModelBuilder { | |||||
| public: | |||||
| ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs, | |||||
| const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode); | |||||
| virtual ~ModelBuilder(); | |||||
| Status BuildModel(ge::Model &model_def); | |||||
| Status SaveWeightsToModel(ge::Model &model); | |||||
| Status SaveDataToModel(ge::Model &model, ge::GeModel &ge_model); | |||||
| Status PreBuildModel(); | |||||
| Status BuildModelForGetTask(ge::Model &model_def); | |||||
| ge::Buffer GetWeightBuffer() const; | |||||
| void SetModelVersion(ge::Model &model_def); | |||||
| public: | |||||
| ge::Buffer weight_buffer_; | |||||
| }; | |||||
| ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs, | |||||
| const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) { | |||||
| weight_buffer_ = ge::Buffer(4100000); | |||||
| } | |||||
| ModelBuilder::~ModelBuilder() {} | |||||
| Status ModelBuilder::SaveWeightsToModel(ge::Model &model) { return SUCCESS; } | |||||
| Status ModelBuilder::BuildModel(ge::Model &model_def) { return SUCCESS; } | |||||
| Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { return SUCCESS; } | |||||
| Status ModelBuilder::PreBuildModel() { return SUCCESS; } | |||||
| Status ModelBuilder::BuildModelForGetTask(ge::Model &model_def) { return SUCCESS; } | |||||
| void ModelBuilder::SetModelVersion(ge::Model &model_def) { return; } | |||||
| ge::Buffer ModelBuilder::GetWeightBuffer() const { return ge::Buffer(4100000); } | |||||
| } // namespace ge | |||||
| using ProcParam = struct PROC_PARAM; | |||||
| namespace ge { | |||||
| #include <iostream> | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N = 0; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C = 1; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H = 2; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W = 3; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N = 0; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H = 1; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W = 2; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C = 3; | |||||
| const uint32_t MODEL_FILE_MAGIC_NUM = 0x444F4D49; | |||||
| const uint32_t MODEL_FILE_HEAD_LEN = 256; | |||||
| const uint32_t MODEL_VERSION = 0x10000000; | |||||
| const int MAX_FILE_SIZE_LIMIT = INT_MAX; | |||||
| bool FC_WEIGHT_COMPRESS_FLAG = false; | |||||
| bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length) { | |||||
| length = 10; | |||||
| *buffer = new (std::nothrow) char[10](); | |||||
| GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(*buffer == nullptr, false, "new an object failed."); | |||||
| return true; | |||||
| } | |||||
| bool ReadProtoFromText(const char *file, google::protobuf::Message *message) { | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((nullptr == file || nullptr == message), return false, | |||||
| "incorrect parameter. nullptr == file || nullptr == message"); | |||||
| string real_path = RealPath(file); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "proto file path '%s' not valid", file); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path.c_str()) == -1, return false, "file size not valid."); | |||||
| std::ifstream fs(real_path.c_str(), std::ifstream::in); | |||||
| if (!fs.is_open()) { | |||||
| GELOGE(ge::FAILED, "proto file '%s' open fail.", file); | |||||
| return false; | |||||
| } | |||||
| google::protobuf::io::IstreamInputStream input(&fs); | |||||
| bool ret = google::protobuf::TextFormat::Parse(&input, message); | |||||
| GE_IF_BOOL_EXEC(ret != true, | |||||
| GELOGI("call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); | |||||
| fs.close(); | |||||
| return ret; | |||||
| } | |||||
| uint64_t GetCurrentTimestap() { return 0; } | |||||
| // get length of file | |||||
| long GetFileLength(const std::string &input_file) { | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(input_file.empty(), return -1, "input_file path is null."); | |||||
| string real_path = RealPath(input_file.c_str()); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); | |||||
| unsigned long long file_length = 0; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1, | |||||
| "open file failed."); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid."); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.", | |||||
| file_length, MAX_FILE_SIZE_LIMIT); | |||||
| return file_length; | |||||
| } | |||||
| string RealPath(const char *path) { | |||||
| string s = path; | |||||
| if (s.size() >= PATH_MAX) { | |||||
| return ""; | |||||
| } | |||||
| if (s == "." || s == "1") { | |||||
| return path; | |||||
| // for insert_aipp_op unittest | |||||
| } else if (s.substr(0, 3) == "llt") { | |||||
| return path; | |||||
| } else { | |||||
| return "22"; | |||||
| } | |||||
| } | |||||
| bool CheckInputPathValid(const string &file_path) { return true; } | |||||
| bool ReadProtoFromArray(const void *data, int size, Message *proto) { return true; } | |||||
| struct ModelPartition { | |||||
| ModelPartitionType type; | |||||
| uint8_t *data = 0; | |||||
| uint32_t size = 0; | |||||
| }; | |||||
| class InsertNewOpUtil { | |||||
| public: | |||||
| InsertNewOpUtil(); | |||||
| ~InsertNewOpUtil(); | |||||
| Status InsertNewOps(const ComputeGraphPtr &graph); | |||||
| Status InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path); | |||||
| Status Parse(const char *conf_path); | |||||
| }; | |||||
| InsertNewOpUtil::InsertNewOpUtil() {} | |||||
| Status InsertNewOpUtil::InsertNewOps(const ComputeGraphPtr &graph) { return SUCCESS; } | |||||
| Status InsertNewOpUtil::InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path) { return SUCCESS; } | |||||
| Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; } | |||||
| Status InitOME() { return SUCCESS; } | |||||
| class GraphOptimizer { | |||||
| public: | |||||
| Status Optimize(); | |||||
| Status OptimizeAfterCal(); | |||||
| Status AdjustDataOpDesc(); | |||||
| Status InsertTransOp(); | |||||
| Status FusionFmkop(); | |||||
| Status Optimize4Cloud(); | |||||
| Status Optimize4FlowCtrl(); | |||||
| Status OptimizeBeforeBuild(); | |||||
| }; | |||||
| Status GraphOptimizer::Optimize() { return SUCCESS; } | |||||
| Status Init(Options options) { return SUCCESS; } | |||||
| Status Shutdown(Options options) { return SUCCESS; } | |||||
| class Session { | |||||
| public: | |||||
| // singleton | |||||
| static Session *Instance(); | |||||
| const uint32_t &DeviceId() const; | |||||
| }; | |||||
| const uint32_t &Session::DeviceId() const { return 0; } | |||||
| Session *Session::Instance() { | |||||
| static Session instance; | |||||
| return &instance; | |||||
| } | |||||
| struct OmgContext { | |||||
| domiTensorFormat_t format; | |||||
| // get input format from cmd | |||||
| std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||||
| std::vector<domiTensorFormat_t> output_formats; | |||||
| // user-designate input dims | |||||
| std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||||
| // global input dims | |||||
| std::map<std::string, std::vector<int64_t>> input_dims; | |||||
| // solve rename op e.g: Detectionoutput:SsdDetectiontOutput | |||||
| std::map<std::string, std::string> op_conf_map; | |||||
| // save output node of network: key is op name, value = index, index is the output index of op | |||||
| std::map<std::string, std::vector<int32_t>> out_nodes_map; | |||||
| // user-designate out nodes (this is used for determing the orders) | |||||
| std::vector<std::pair<std::string, int32_t>> user_out_nodes; | |||||
| // save the path of cutsom_aicpu | |||||
| std::vector<std::string> aicpu_op_run_paths; | |||||
| // save ddk | |||||
| std::string ddk_version; | |||||
| // save format | |||||
| domiTensorFormat_t net_format; | |||||
| FrameworkType type; | |||||
| // RunMode run_mode; | |||||
| bool train_flag = false; | |||||
| std::string output_type; | |||||
| /// save the name of network | |||||
| /// eg:faster-rcnn, based on FirstStageProcessor after scope_fusion is faster-rcnn | |||||
| /// then reorder conv+reshape of FirstStageBoxPredictor/BoxEncodingPredictor | |||||
| /// need to delete op of reshape | |||||
| std::string net_name; | |||||
| }; | |||||
| } // namespace ge | |||||
| namespace domi { | |||||
| ge::OmgContext &GetContext() { | |||||
| static ge::OmgContext tmp; | |||||
| return tmp; | |||||
| } | |||||
| } // namespace domi | |||||
| namespace ge { | |||||
| class OpUtils { | |||||
| public: | |||||
| static Status InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor); | |||||
| static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim, | |||||
| ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); | |||||
| static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor); | |||||
| }; | |||||
| Status OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor) { | |||||
| ccCreatePoolingMaskDescriptor(&cc_tensor); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim, | |||||
| ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { | |||||
| Status ret = SUCCESS; | |||||
| return ret; | |||||
| } | |||||
| class FileSaver { | |||||
| public: | |||||
| Status SaveToFile(const string &file_path, ModelFileHeader &model_file_header, | |||||
| ModelPartitionTable &model_partition_table, const std::vector<ModelPartition> &partition_datas); | |||||
| Status SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, | |||||
| const ModelFileHeader *model_file_header, bool check_sum); | |||||
| }; | |||||
| Status FileSaver::SaveToFile(const string &file_path, ModelFileHeader &model_file_header, | |||||
| ModelPartitionTable &model_partition_table, | |||||
| const std::vector<ModelPartition> &partition_datas) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status FileSaver::SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, | |||||
| const ModelFileHeader *model_file_header, bool check_sum) { | |||||
| return SUCCESS; | |||||
| } | |||||
| class ModelSaver : public FileSaver {}; | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor( | |||||
| ccTensorDescriptor_t &cc_tensor) { | |||||
| if (nullptr != cc_tensor) { | |||||
| ccStatus_t ret = ccDestroyTensorDescriptor(&cc_tensor); | |||||
| GE_LOGE_IF(CC_STATUS_SUCCESS != ret, "ccDestroyTensorDescriptor failed. ret = %d", ret); | |||||
| cc_tensor = nullptr; | |||||
| } | |||||
| } | |||||
| } // namespace ge | |||||
| namespace domi { | |||||
| class OpRegistrationData {}; | |||||
| class OpRegistry { | |||||
| public: | |||||
| static OpRegistry *Instance(); | |||||
| std::vector<OpRegistrationData> registration_datas; | |||||
| ImplyType GetImplyType(const std::string &op_type); | |||||
| void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type); | |||||
| }; | |||||
| OpRegistry *OpRegistry::Instance() { | |||||
| static OpRegistry instance; | |||||
| return &instance; | |||||
| } | |||||
| void OpRegistry::GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type) { | |||||
| if (imply_type == ImplyType::AI_CPU) { | |||||
| vec_op_type.push_back("square"); | |||||
| } | |||||
| } | |||||
| class OpRegistrationTbe { | |||||
| public: | |||||
| static OpRegistrationTbe *Instance(); | |||||
| bool Finalize(OpRegistrationData ®_data, bool is_train); | |||||
| }; | |||||
| OpRegistrationTbe *OpRegistrationTbe::Instance() { | |||||
| static OpRegistrationTbe instance; | |||||
| return &instance; | |||||
| } | |||||
| bool OpRegistrationTbe::Finalize(OpRegistrationData ®_data, bool is_train) { return true; } | |||||
| } // namespace domi | |||||
| namespace ge { | |||||
| class GraphPrepare { | |||||
| private: | |||||
| Status OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph); | |||||
| }; | |||||
| Status GraphPrepare::OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph) { return SUCCESS; } | |||||
| } // namespace ge | |||||
| namespace ge { | |||||
| Status GetOriginalType(const ge::NodePtr &node, string &type) { | |||||
| type = node->GetType(); | |||||
| GE_IF_BOOL_EXEC(type != FRAMEWORKOP, return SUCCESS); | |||||
| ge::AttrUtils::GetStr(node->GetOpDesc(), "original_type", type); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetCycleEvent(const ge::NodePtr &node) { return SUCCESS; } | |||||
| Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = AttrUtils::CloneOpDesc(node->GetOpDesc()); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| if (!AttrUtils::SetStr(tmp_desc, "_stream_label", label)) { | |||||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetActiveLabelList(const ge::NodePtr &node, const std::vector<std::string> &label) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| // add list of active_label | |||||
| if (!AttrUtils::SetListStr(tmp_desc, "_active_label", label)) { | |||||
| GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &branch_label) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| // add branch_label of switch | |||||
| if (!AttrUtils::SetStr(tmp_desc, "_switch_branch_node_label", branch_label)) { | |||||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| // add switch_true_branch_flag | |||||
| if (!AttrUtils::SetBool(tmp_desc, "_switch_true_branch_flag", value)) { | |||||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| // record original_node_name | |||||
| if (!AttrUtils::SetStr(tmp_desc, "_original_node_name", orig_name)) { | |||||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetCyclicDependenceFlag(const ge::NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| // add cyclic_dependence_flag | |||||
| if (!AttrUtils::SetBool(tmp_desc, "_cyclic_dependence_flag", true)) { | |||||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetNextIteration(const ge::NodePtr &node, const std::string &next) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(tmp_desc); | |||||
| if (!AttrUtils::SetStr(tmp_desc, "_next_iteration_node", next)) { | |||||
| GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| namespace cce { | |||||
| bool ccGetFuncState(ccFuncParamType_t type) { return true; } | |||||
| } // namespace cce | |||||
| namespace ge { | |||||
| Status UnloadModel(uint32_t model_id) { return SUCCESS; } | |||||
| Status GetInputOutputDescInfo(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DataInput(const InputData *input_data, OutputData *output_data) { return SUCCESS; } | |||||
| /* | |||||
| class ModelManager { | |||||
| public: | |||||
| static std::shared_ptr<ModelManager> GetInstance(); | |||||
| static void FinalizeForPtr(ModelManager *) {} | |||||
| Status DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs, | |||||
| std::vector<ge::TensorInfo> &outputs); | |||||
| Status DataInput(const InputData &input_data, OutputData &output_data); | |||||
| Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc); | |||||
| Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats); | |||||
| Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, std::vector<uint32_t> &output_formats); | |||||
| Status Stop(uint32_t model_id); | |||||
| Status Unload(uint32_t model_id); | |||||
| Status LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model, | |||||
| std::shared_ptr<ModelListener> listener); | |||||
| Status Start(uint32_t model_id); | |||||
| Status GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size); | |||||
| Status LoadModelOffline(uint32_t &model_id, const ModelData &model, std::shared_ptr<ModelListener> listener = nullptr, | |||||
| void *dev_ptr = nullptr, size_t mem_size = 0, void *weight_ptr = nullptr, | |||||
| size_t weight_size = 0); | |||||
| Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids, | |||||
| const std::vector<uint32_t> &output_queue_ids); | |||||
| Status HandleCommand(const Command &command); | |||||
| Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||||
| OutputData &output_data); | |||||
| void DestroyAicpuSession(uint64_t session_id); | |||||
| }; | |||||
| void ModelManager::DestroyAicpuSession(uint64_t session_id) {} | |||||
| std::shared_ptr<ModelManager> ModelManager::GetInstance() { | |||||
| static std::shared_ptr<ModelManager> instance_ptr = | |||||
| shared_ptr<ModelManager>(new ModelManager(), ModelManager::FinalizeForPtr); | |||||
| return instance_ptr; | |||||
| } | |||||
| Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs, | |||||
| std::vector<ge::TensorInfo> &outputs) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { return SUCCESS; } | |||||
| Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
| std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::Stop(uint32_t model_id) { return SUCCESS; } | |||||
| Status ModelManager::Unload(uint32_t model_id) { return SUCCESS; } | |||||
| Status ModelManager::LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model, | |||||
| std::shared_ptr<ModelListener> listener) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::Start(uint32_t model_id) { return SUCCESS; } | |||||
| Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { return SUCCESS; } | |||||
| Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | |||||
| void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | |||||
| const std::vector<uint32_t> &input_queue_ids, | |||||
| const std::vector<uint32_t> &output_queue_ids) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::HandleCommand(const Command &command) { return SUCCESS; } | |||||
| Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||||
| OutputData &output_data) { | |||||
| return SUCCESS; | |||||
| } | |||||
| */ | |||||
| } // namespace ge | |||||
| namespace ge { | |||||
| enum JobState { | |||||
| JOBSTATE_WAITING = 1, | |||||
| JOBSTATE_RUNNING, | |||||
| JOBSTATE_KILLING, | |||||
| JOBSTATE_SUCCEED, | |||||
| JOBSTATE_FAILED, | |||||
| JOBSTATE_KILLED, | |||||
| JOBSTATE_UNKOWN | |||||
| }; | |||||
| enum JobSubState { | |||||
| JOBSUBSTATE_ENV_INIT = 201, | |||||
| JOBSUBSTATE_ENV_FIN, | |||||
| JOBSUBSTATE_RESOUCE_ALLOC, | |||||
| JOBSUBSTATE_MODEL_COMPILE, | |||||
| JOBSUBSTATE_GRAPH_PREPARE, | |||||
| JOBSUBSTATE_GRAPH_SPLIT, | |||||
| JOBSUBSTATE_GRAPH_OPTIMIZE, | |||||
| JOBSUBSTATE_GRAPH_BUILD, | |||||
| JOBSUBSTATE_GRAPH_LOAD, | |||||
| JOBSUBSTATE_GRAPH_EXEC, | |||||
| JOBSUBSTATE_GRAPH_UNLOAD, | |||||
| JOBSUBSTATE_OTHER | |||||
| }; | |||||
| enum ErrorModule { | |||||
| ERROR_MODULE_DRIVER = 0x01, | |||||
| ERROR_MODULE_RUNTIME = 0x04, | |||||
| ERROR_MODULE_CCE = 0x06, | |||||
| ERROR_MODULE_FMK = 0x08, | |||||
| ERROR_MODULE_HCCL = 0x12 | |||||
| }; | |||||
| class CsaInteract { | |||||
| public: | |||||
| CsaInteract &GetInstance(); | |||||
| void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state); | |||||
| void Init(int32_t dev_index, int64_t job_id); | |||||
| Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER, | |||||
| uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK); | |||||
| // device index | |||||
| int32_t dev_index_; | |||||
| // job id | |||||
| int64_t job_id_; | |||||
| // is initialization complete | |||||
| bool is_init_; | |||||
| // current job state | |||||
| JobState curr_state_; | |||||
| // job state file | |||||
| std::string job_state_file_; | |||||
| // network connectivity detect file | |||||
| std::string hcom_detect_file_; | |||||
| // identification of internal errors that occurred during the training | |||||
| bool is_have_internal_error_; | |||||
| }; | |||||
| CsaInteract &CsaInteract::GetInstance() { | |||||
| static CsaInteract instance; | |||||
| return instance; | |||||
| } | |||||
| void CsaInteract::Init(int32_t dev_index, int64_t job_id) { | |||||
| if (!is_init_) { | |||||
| dev_index_ = dev_index; | |||||
| job_id_ = job_id; | |||||
| string csa_path_prefix; | |||||
| if (std::getenv(FMK_STATUS_FILE_DIR_ENV) != nullptr) { | |||||
| csa_path_prefix = std::getenv(FMK_STATUS_FILE_DIR_ENV); | |||||
| } | |||||
| if (!csa_path_prefix.empty()) { | |||||
| std::string job_state_file = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME; | |||||
| std::string hcom_detect_file = | |||||
| csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME; | |||||
| job_state_file_ = RealPath(job_state_file.c_str()); | |||||
| hcom_detect_file_ = RealPath(hcom_detect_file.c_str()); | |||||
| } | |||||
| is_init_ = true; | |||||
| } | |||||
| } | |||||
| void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) {} | |||||
| } // namespace ge | |||||
| Status ModelParserBase::LoadFromFile(const char *model_path, const char *key, int32_t priority, | |||||
| ge::ModelData &model_data) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode, | |||||
| ErrorModule error_module) { | |||||
| return SUCCESS; | |||||
| } | |||||
| namespace ge { | |||||
| static std::map<ge::DataType, uint32_t> data_type_to_length = { | |||||
| {DT_BOOL, sizeof(bool)}, {DT_INT64, sizeof(int64_t)}, {DT_UINT64, sizeof(int64_t)}, {DT_FLOAT, sizeof(float)}, | |||||
| {DT_INT32, sizeof(int32_t)}, {DT_UINT32, sizeof(int32_t)}, {DT_INT8, sizeof(char)}, {DT_UINT8, sizeof(char)}, | |||||
| {DT_INT16, sizeof(int16_t)}, {DT_UINT16, sizeof(int16_t)}, {DT_FLOAT16, sizeof(int16_t)}, {DT_DOUBLE, sizeof(double)}, | |||||
| }; | |||||
| class TypeUtils { | |||||
| public: | |||||
| static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length); | |||||
| static bool CheckUint64MulOverflow(uint64_t a, uint32_t b); | |||||
| }; | |||||
| bool TypeUtils::GetDataTypeLength(ge::DataType data_type, uint32_t &length) { | |||||
| auto it = data_type_to_length.find(data_type); | |||||
| if (it != data_type_to_length.end()) { | |||||
| length = it->second; | |||||
| return true; | |||||
| } else { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| bool TypeUtils::CheckUint64MulOverflow(uint64_t a, uint32_t b) { | |||||
| // Not overflow | |||||
| if (a == 0) { | |||||
| return false; | |||||
| } | |||||
| if ((ULLONG_MAX / a) >= b) { | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -27,8 +27,8 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { | |||||
| } | } | ||||
| rtError_t rtCtxGetCurrent(rtContext_t *ctx) { | rtError_t rtCtxGetCurrent(rtContext_t *ctx) { | ||||
| int x = 1; | |||||
| *ctx = (void *)x; | |||||
| uintptr_t x = 1; | |||||
| *ctx = (rtContext_t *)x; | |||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| @@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char | |||||
| rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | ||||
| rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | |||||
| rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } | rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } | ||||
| rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, | rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, | ||||
| rtStream_t stream) { | rtStream_t stream) { | ||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| @@ -156,7 +163,7 @@ rtError_t rtSetKernelReportCallback(rtKernelReportCallback callback) { | |||||
| rt_kernel_info.module_addr = (void *)100; | rt_kernel_info.module_addr = (void *)100; | ||||
| rt_kernel_info.module_size = 100; | rt_kernel_info.module_size = 100; | ||||
| rtStream_t stream; | |||||
| rtStream_t stream = nullptr; | |||||
| callback(stream, &rt_kernel_info); | callback(stream, &rt_kernel_info); | ||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| @@ -193,7 +200,8 @@ rtError_t rtModelCreate(rtModel_t *model, uint32_t flag) { | |||||
| } | } | ||||
| rtError_t rtModelDestroy(rtModel_t model) { | rtError_t rtModelDestroy(rtModel_t model) { | ||||
| delete model; | |||||
| uint32_t *stub = static_cast<uint32_t *>(model); | |||||
| delete stub; | |||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| @@ -18,23 +18,23 @@ project(ut_ge) | |||||
| set(CMAKE_CXX_STANDARD 11) | set(CMAKE_CXX_STANDARD 11) | ||||
| set(PROTO_LIST | set(PROTO_LIST | ||||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/insert_op.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/dump_task.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto" | |||||
| ) | |||||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/insert_op.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/dump_task.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto" | |||||
| "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto" | |||||
| ) | |||||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
| @@ -135,6 +135,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/common/types.cc" | "${GE_CODE_DIR}/ge/common/types.cc" | ||||
| "${GE_CODE_DIR}/ge/common/fmk_error_codes.cc" | "${GE_CODE_DIR}/ge/common/fmk_error_codes.cc" | ||||
| "${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc" | "${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc" | ||||
| "${GE_CODE_DIR}/ge/common/context/ctx.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc" | "${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc" | ||||
| "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" | "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/generator/ge_generator.cc" | "${GE_CODE_DIR}/ge/generator/ge_generator.cc" | ||||
| @@ -163,7 +164,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | ||||
| "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | ||||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" | ||||
| @@ -266,8 +267,8 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/model/ge_model.cc" | "${GE_CODE_DIR}/ge/model/ge_model.cc" | ||||
| "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | ||||
| @@ -393,14 +394,13 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" | "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" | ||||
| "${GE_CODE_DIR}/ge/common/properties_manager.cc" | "${GE_CODE_DIR}/ge/common/properties_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||||
| "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | ||||
| "${GE_CODE_DIR}/ge/common/util.cc" | "${GE_CODE_DIR}/ge/common/util.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" | ||||
| @@ -458,7 +458,7 @@ set(GRAPH_BUILD_COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | ||||
| "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | ||||
| "${GE_CODE_DIR}/ge/common/thread_pool.cc" | "${GE_CODE_DIR}/ge/common/thread_pool.cc" | ||||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/build/run_context.cc" | "${GE_CODE_DIR}/ge/graph/build/run_context.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | ||||
| ) | ) | ||||
| @@ -627,7 +627,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||||
| #"graph/load/new_model_manager_davinci_model_unittest.cc" | #"graph/load/new_model_manager_davinci_model_unittest.cc" | ||||
| "graph/load/model_manager_unittest.cc" | "graph/load/model_manager_unittest.cc" | ||||
| #"graph/load/new_model_manager_task_build_unittest.cc" | #"graph/load/new_model_manager_task_build_unittest.cc" | ||||
| "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" | |||||
| "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" | |||||
| "graph/load/end_graph_task_unittest.cc" | "graph/load/end_graph_task_unittest.cc" | ||||
| "graph/load/new_model_manager_event_manager_unittest.cc" | "graph/load/new_model_manager_event_manager_unittest.cc" | ||||
| #"graph/load/output_net_output_unittest.cc" | #"graph/load/output_net_output_unittest.cc" | ||||
| @@ -638,7 +638,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||||
| "graph/load/kernel_task_info_unittest.cc" | "graph/load/kernel_task_info_unittest.cc" | ||||
| "graph/load/memcpy_addr_async_task_info_unittest.cc" | "graph/load/memcpy_addr_async_task_info_unittest.cc" | ||||
| "graph/load/memcpy_async_task_info_unittest.cc" | "graph/load/memcpy_async_task_info_unittest.cc" | ||||
| "graph/load/cpu_queue_schedule_unittest.cc" | |||||
| "graph/load/cpu_queue_schedule_unittest.cc" | |||||
| #"graph/graph_load_unittest.cc" | #"graph/graph_load_unittest.cc" | ||||
| "graph/ge_executor_unittest.cc" | "graph/ge_executor_unittest.cc" | ||||
| "graph/load/model_helper_unittest.cc" | "graph/load/model_helper_unittest.cc" | ||||
| @@ -671,7 +671,7 @@ set(PASS_TEST_FILES | |||||
| "graph/passes/trans_op_depth_fusion_pass_unittest.cc" | "graph/passes/trans_op_depth_fusion_pass_unittest.cc" | ||||
| "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" | "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" | ||||
| "graph/passes/constant_folding_pass_unittest.cc" | "graph/passes/constant_folding_pass_unittest.cc" | ||||
| "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" | |||||
| "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" | |||||
| "graph/passes/stop_gradient_pass_unittest.cc" | "graph/passes/stop_gradient_pass_unittest.cc" | ||||
| "graph/passes/prevent_gradient_pass_unittest.cc" | "graph/passes/prevent_gradient_pass_unittest.cc" | ||||
| "graph/passes/identity_pass_unittest.cc" | "graph/passes/identity_pass_unittest.cc" | ||||
| @@ -752,25 +752,38 @@ set(MULTI_PARTS_TEST_FILES | |||||
| "graph/build/mem_assigner_unittest.cc" | "graph/build/mem_assigner_unittest.cc" | ||||
| "graph/preprocess/graph_preprocess_unittest.cc" | "graph/preprocess/graph_preprocess_unittest.cc" | ||||
| "graph/manager/hcom_util_unittest.cc" | "graph/manager/hcom_util_unittest.cc" | ||||
| "graph/manager/graph_caching_allocator_unittest.cc" | |||||
| "session/omg_omg_unittest.cc" | "session/omg_omg_unittest.cc" | ||||
| ) | ) | ||||
| set(GENERATOR_TEST_FILES | |||||
| "generator/ge_generator_unittest.cc" | |||||
| ) | |||||
| set(EXECUTOR_TEST_FILES | |||||
| "executor/ge_executor_unittest.cc" | |||||
| ) | |||||
| set(SINGLE_OP_TEST_FILES | set(SINGLE_OP_TEST_FILES | ||||
| #"single_op/single_op_model_unittest.cc" | |||||
| "single_op/single_op_model_unittest.cc" | |||||
| "single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
| "single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
| "single_op/single_op_task_unittest.cc" | |||||
| ) | ) | ||||
| set(PROFILING_MNG_TEST_FILES | set(PROFILING_MNG_TEST_FILES | ||||
| "profiling/ge_profiling_manager_unittest.cc" | "profiling/ge_profiling_manager_unittest.cc" | ||||
| ) | ) | ||||
| set(HYBRID_TEST_FILES | |||||
| "hybrid/ge_hybrid_unittest.cc" | |||||
| ) | |||||
| set(OTHERS_TEST_FILES | set(OTHERS_TEST_FILES | ||||
| "plugin_manager/ge_util_unittest.cc" | "plugin_manager/ge_util_unittest.cc" | ||||
| ) | ) | ||||
| list(APPEND COMMON_SHARED_LIBRARIES | list(APPEND COMMON_SHARED_LIBRARIES | ||||
| omg_stub | |||||
| c_sec | c_sec | ||||
| slog_stub | slog_stub | ||||
| cce_ge_stub | cce_ge_stub | ||||
| @@ -1055,10 +1068,13 @@ target_link_libraries(ut_libge_kernel_utest | |||||
| # libge_distinct_load_utest | # libge_distinct_load_utest | ||||
| add_executable(ut_libge_distinct_load_utest | add_executable(ut_libge_distinct_load_utest | ||||
| ${COMMON_TEST_FILES} | ${COMMON_TEST_FILES} | ||||
| ${GENERATOR_TEST_FILES} | |||||
| ${EXECUTOR_TEST_FILES} | |||||
| ${DISTINCT_GRAPH_LOAD_TEST_FILES} | ${DISTINCT_GRAPH_LOAD_TEST_FILES} | ||||
| ${DISTINCT_GRAPH_LOAD_SRC_FILES} | ${DISTINCT_GRAPH_LOAD_SRC_FILES} | ||||
| ${SINGLE_OP_TEST_FILES} | ${SINGLE_OP_TEST_FILES} | ||||
| ${PROFILING_MNG_TEST_FILES} | ${PROFILING_MNG_TEST_FILES} | ||||
| ${HYBRID_TEST_FILES} | |||||
| ) | ) | ||||
| target_compile_options(ut_libge_distinct_load_utest PRIVATE | target_compile_options(ut_libge_distinct_load_utest PRIVATE | ||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "executor/ge_executor.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| using namespace std; | |||||
| namespace ge { | |||||
| class UtestGeExecutor : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestGeExecutor, test_single_op_exec) { | |||||
| GeExecutor exeutor; | |||||
| ModelData model_data; | |||||
| string model_name = "1234"; | |||||
| EXPECT_EQ(exeutor.LoadSingleOp(model_name, model_data, nullptr, nullptr), ACL_ERROR_GE_INTERNAL_ERROR); | |||||
| EXPECT_EQ(exeutor.LoadDynamicSingleOp(model_name, model_data, nullptr, nullptr), PARAM_INVALID); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,78 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "generator/ge_generator.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| using namespace std; | |||||
| namespace ge { | |||||
| class UtestGeGenerator : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestGeGenerator, test_build_single_op_offline) { | |||||
| GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor_desc, 512); | |||||
| shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| GeTensor tensor(tensor_desc); | |||||
| const vector<GeTensor> inputs = { tensor, tensor }; | |||||
| const vector<GeTensor> outputs = { tensor }; | |||||
| // not Initialize, impl is null. | |||||
| GeGenerator generator; | |||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), PARAM_INVALID); | |||||
| // const map<string, string> &options | |||||
| generator.Initialize({}); | |||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||||
| } | |||||
| /* | |||||
| TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||||
| GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor_desc, 512); | |||||
| shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); | |||||
| GeTensor tensor(tensor_desc); | |||||
| const vector<GeTensor> inputs = { tensor, tensor }; | |||||
| const vector<GeTensor> outputs = { tensor }; | |||||
| // not Initialize, impl is null. | |||||
| GeGenerator generator; | |||||
| generator.Initialize({}); | |||||
| ModelBufferData model_buffer; | |||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||||
| } | |||||
| */ | |||||
| } // namespace ge | |||||
| @@ -25,10 +25,12 @@ | |||||
| #include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "omg/omg_inner_types.h" | #include "omg/omg_inner_types.h" | ||||
| #include "../passes/graph_builder_utils.h" | |||||
| #define protected public | #define protected public | ||||
| #define private public | #define private public | ||||
| #include "graph/build/memory/binary_block_mem_assigner.h" | #include "graph/build/memory/binary_block_mem_assigner.h" | ||||
| #include "graph/build/memory/graph_mem_assigner.h" | |||||
| #include "graph/build/memory/hybrid_mem_assigner.h" | #include "graph/build/memory/hybrid_mem_assigner.h" | ||||
| #include "graph/build/memory/max_block_mem_assigner.h" | #include "graph/build/memory/max_block_mem_assigner.h" | ||||
| #undef protected | #undef protected | ||||
| @@ -41,7 +43,7 @@ using domi::GetContext; | |||||
| class UtestMemoryAssignerTest : public testing::Test { | class UtestMemoryAssignerTest : public testing::Test { | ||||
| public: | public: | ||||
| ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||||
| ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||||
| ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ||||
| auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | ||||
| auto desc_temp = *desc_temp_ptr; | auto desc_temp = *desc_temp_ptr; | ||||
| @@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
| op_def->SetWorkspaceBytes(workspace_bytes); | op_def->SetWorkspaceBytes(workspace_bytes); | ||||
| return op_def; | return op_def; | ||||
| } | } | ||||
| void make_graph(ge::ComputeGraphPtr graph) { | |||||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||||
| ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||||
| ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | |||||
| auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | |||||
| auto desc_temp = *desc_temp_ptr; | |||||
| TensorUtils::SetSize(desc_temp, 1024); | |||||
| op_def->AddInputDesc(desc_temp); | |||||
| auto desc_output_ptr = make_shared<ge::GeTensorDesc>(); | |||||
| auto desc_output = *desc_output_ptr; | |||||
| TensorUtils::SetSize(desc_output, 6500); | |||||
| ge::TensorUtils::SetReuseInput(desc_output, true); | |||||
| ge::TensorUtils::SetReuseInputIndex(desc_output, 0); | |||||
| op_def->AddOutputDesc(desc_output); | |||||
| std::vector<int64_t> workspace_bytes; | |||||
| workspace_bytes.push_back(wsByte); | |||||
| op_def->SetWorkspaceBytes(workspace_bytes); | |||||
| return op_def; | |||||
| } | |||||
| void MakeGraph(ge::ComputeGraphPtr &graph) { | |||||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||||
| op_def_a->SetStreamId(0); | op_def_a->SetStreamId(0); | ||||
| ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); | |||||
| ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); | |||||
| op_def_b->SetStreamId(0); | op_def_b->SetStreamId(0); | ||||
| ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000); | |||||
| ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000); | |||||
| op_def_c->SetStreamId(1); | op_def_c->SetStreamId(1); | ||||
| ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000); | |||||
| ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000); | |||||
| op_def_d->SetStreamId(2); | op_def_d->SetStreamId(2); | ||||
| ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000); | |||||
| ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000); | |||||
| op_def_e->SetStreamId(3); | op_def_e->SetStreamId(3); | ||||
| ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000); | |||||
| ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000); | |||||
| op_def_f->SetStreamId(2); | op_def_f->SetStreamId(2); | ||||
| ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000); | |||||
| ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000); | |||||
| op_def_g->SetStreamId(3); | op_def_g->SetStreamId(3); | ||||
| ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000); | |||||
| ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000); | |||||
| op_def_h->SetStreamId(2); | op_def_h->SetStreamId(2); | ||||
| ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000); | |||||
| ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000); | |||||
| op_def_i->SetStreamId(2); | op_def_i->SetStreamId(2); | ||||
| ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT); | |||||
| ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT); | |||||
| op_def_j->SetStreamId(3); | op_def_j->SetStreamId(3); | ||||
| // add node | // add node | ||||
| @@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
| graph->TopologicalSorting(); | graph->TopologicalSorting(); | ||||
| } | } | ||||
| void make_reuse_graph(ge::ComputeGraphPtr graph) { | |||||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||||
| ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); | |||||
| ge::OpDescPtr op_def_c = make_shared<ge::OpDesc>("C", "Some"); | |||||
| auto desc_input_ptr = make_shared<ge::GeTensorDesc>(); | |||||
| auto desc_input = *desc_input_ptr; | |||||
| TensorUtils::SetSize(desc_input, 1024); | |||||
| op_def_c->AddInputDesc(desc_input); | |||||
| auto desc_output_ptr = make_shared<ge::GeTensorDesc>(); | |||||
| auto desc_output = *desc_output_ptr; | |||||
| TensorUtils::SetSize(desc_output, 6500); | |||||
| ge::TensorUtils::SetReuseInput(desc_output, true); | |||||
| ge::TensorUtils::SetReuseInputIndex(desc_output, 0); | |||||
| op_def_c->AddOutputDesc(desc_output); | |||||
| void MakeReuseGraph(ge::ComputeGraphPtr graph) { | |||||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||||
| ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); | |||||
| ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000); | |||||
| ge::OpDescPtr op_def_d = make_shared<ge::OpDesc>("D", "CONSTANT"); | ge::OpDescPtr op_def_d = make_shared<ge::OpDesc>("D", "CONSTANT"); | ||||
| ge::NodePtr node_a = graph->AddNode(op_def_a); | ge::NodePtr node_a = graph->AddNode(op_def_a); | ||||
| @@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
| graph->TopologicalSorting(); | graph->TopologicalSorting(); | ||||
| } | } | ||||
| ComputeGraphPtr MakeCascadeContinuousMemoryGraph() { | |||||
| ge::ut::GraphBuilder builder("graph"); | |||||
| auto data = builder.AddNode("data", "Data", 1, 1); | |||||
| auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | |||||
| auto addn2 = builder.AddNode("addn2", "AddN", 1, 1); | |||||
| auto addn3 = builder.AddNode("addn3", "AddN", 1, 1); | |||||
| auto concat1 = builder.AddNode("concat1", "Concat", 2, 1); | |||||
| auto concat2 = builder.AddNode("concat2", "Concat", 2, 1); | |||||
| auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); | |||||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); | |||||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); | |||||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); | |||||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); | |||||
| addn1->GetOpDesc()->SetOutputOffset({100}); | |||||
| addn2->GetOpDesc()->SetOutputOffset({200}); | |||||
| concat1->GetOpDesc()->SetOutputOffset({100}); | |||||
| addn3->GetOpDesc()->SetOutputOffset({700}); | |||||
| concat2->GetOpDesc()->SetOutputOffset({500}); | |||||
| ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||||
| ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||||
| ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||||
| ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200}); | |||||
| ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300}); | |||||
| builder.AddDataEdge(data, 0, addn1, 0); | |||||
| builder.AddDataEdge(data, 0, addn2, 0); | |||||
| builder.AddDataEdge(addn1, 0, concat1, 0); | |||||
| builder.AddDataEdge(addn2, 0, concat1, 1); | |||||
| builder.AddDataEdge(concat1, 0, concat2, 0); | |||||
| builder.AddDataEdge(addn3, 0, concat2, 1); | |||||
| return builder.GetGraph(); | |||||
| } | |||||
| protected: | protected: | ||||
| void SetUp() {} | void SetUp() {} | ||||
| @@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
| /* | /* | ||||
| TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { | TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { | ||||
| ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||||
| ge::NodePtr node_a = graph->AddNode(op_def_a); | ge::NodePtr node_a = graph->AddNode(op_def_a); | ||||
| MemoryBlock* memory_block = new MemoryBlock(0); | MemoryBlock* memory_block = new MemoryBlock(0); | ||||
| memory_block->Init(1, kOutput, node_a, 0, 1); | memory_block->Init(1, kOutput, node_a, 0, 1); | ||||
| @@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner { | |||||
| // when check GetMemoryRanges return fail, Assign return fail | // when check GetMemoryRanges return fail, Assign return fail | ||||
| TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { | TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { | ||||
| ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
| make_graph(graph); | |||||
| MakeGraph(graph); | |||||
| std::map<std::string, std::string> anchor_to_symbol; | std::map<std::string, std::string> anchor_to_symbol; | ||||
| std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | ||||
| EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS); | EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS); | ||||
| @@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { | |||||
| MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors); | MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors); | ||||
| EXPECT_EQ(mock_assigner.Assign(), FAILED); | EXPECT_EQ(mock_assigner.Assign(), FAILED); | ||||
| } | } | ||||
| TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { | |||||
| ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph(); | |||||
| auto addn1 = graph->FindNode("addn1"); | |||||
| auto addn2 = graph->FindNode("addn2"); | |||||
| EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100); | |||||
| EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200); | |||||
| GraphMemoryAssigner memoryAssigner(graph); | |||||
| MemoryOffset memory_offset(RT_MEMORY_HBM, 0); | |||||
| memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | |||||
| EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS); | |||||
| EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500); | |||||
| EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); | |||||
| } | |||||
| @@ -34,7 +34,6 @@ | |||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/load/model_manager/task_info/kernel_task_info.h" | #include "graph/load/model_manager/task_info/kernel_task_info.h" | ||||
| #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | ||||
| @@ -109,6 +108,26 @@ static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||||
| ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0); | ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0); | ||||
| return op_desc; | return op_desc; | ||||
| } | } | ||||
| TEST_F(UtestGeExecutor, load_data_from_file) { | |||||
| GeExecutor ge_executor; | |||||
| ge_executor.isInit_ = true; | |||||
| string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps"; | |||||
| string self_smap = "/proc/" + std::to_string(getpid()) + "/maps"; | |||||
| string copy_smap = "cp " + self_smap + " " + test_smap; | |||||
| EXPECT_EQ(system(copy_smap.c_str()), 0); | |||||
| ModelData model_data; | |||||
| EXPECT_EQ(ge_executor.LoadDataFromFile(test_smap, model_data), SUCCESS); | |||||
| EXPECT_NE(model_data.model_data, nullptr); | |||||
| delete[] static_cast<char *>(model_data.model_data); | |||||
| model_data.model_data = nullptr; | |||||
| ge_executor.isInit_ = false; | |||||
| } | |||||
| /* | /* | ||||
| TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) { | TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) { | ||||
| uint32_t model_id = 1; | uint32_t model_id = 1; | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
| @@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||||
| model.SinkModelProfile(); | model.SinkModelProfile(); | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, Sink_time_profile) { | |||||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||||
| DavinciModel model(0, nullptr); | |||||
| InputData current_data; | |||||
| model.SinkTimeProfile(current_data); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -25,7 +25,6 @@ | |||||
| #include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
| #include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| using namespace std; | using namespace std; | ||||
| using namespace testing; | using namespace testing; | ||||
| @@ -21,7 +21,7 @@ | |||||
| #include "common/debug/log.h" | #include "common/debug/log.h" | ||||
| #include "common/l2_cache_optimize.h" | #include "common/l2_cache_optimize.h" | ||||
| #include "common/model_parser/base.h" | |||||
| #include "common/model_parser/model_parser.h" | |||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| @@ -31,7 +31,6 @@ | |||||
| #include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
| #include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| //#include "new_op_test_utils.h" | //#include "new_op_test_utils.h" | ||||
| #undef private | #undef private | ||||
| @@ -0,0 +1,87 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <memory> | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/attr_value.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/op_desc_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "omg/omg_inner_types.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "graph/manager/graph_caching_allocator.h" | |||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| #undef protected | |||||
| #undef private | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| using domi::GetContext; | |||||
| class UtestGraphCachingAllocatorTest : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() { GetContext().out_nodes_map.clear(); } | |||||
| }; | |||||
| TEST_F(UtestGraphCachingAllocatorTest, initialize_success) { | |||||
| std::vector<rtMemType_t> mem_type; | |||||
| mem_type.push_back(RT_MEMORY_HBM); | |||||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
| MemManager::Instance().Finalize(); | |||||
| } | |||||
| TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { | |||||
| std::vector<rtMemType_t> mem_type; | |||||
| mem_type.push_back(RT_MEMORY_HBM); | |||||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||||
| EXPECT_NE(nullptr, ptr); | |||||
| MemManager::Instance().Finalize(); | |||||
| } | |||||
| TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) { | |||||
| std::vector<rtMemType_t> mem_type; | |||||
| mem_type.push_back(RT_MEMORY_HBM); | |||||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||||
| EXPECT_NE(nullptr, ptr); | |||||
| ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize); | |||||
| EXPECT_NE(nullptr, ptr); | |||||
| MemManager::Instance().Finalize(); | |||||
| } | |||||
| TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { | |||||
| std::vector<rtMemType_t> mem_type; | |||||
| mem_type.push_back(RT_MEMORY_HBM); | |||||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||||
| EXPECT_NE(nullptr, ptr); | |||||
| uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize); | |||||
| EXPECT_NE(nullptr, ptr); | |||||
| EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS); | |||||
| EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS); | |||||
| MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks(); | |||||
| MemManager::Instance().Finalize(); | |||||
| } | |||||
| @@ -0,0 +1,113 @@ | |||||
| /** | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #include "runtime/rt.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "hybrid/model/hybrid_model_builder.h" | |||||
| #include "hybrid/model/hybrid_model.h" | |||||
| #include "model/ge_model.h" | |||||
| #include "model/ge_root_model.h" | |||||
| #include "hybrid/node_executor/aicore/aicore_op_task.h" | |||||
| #include "framework/common/taskdown_common.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "graph/ge_context.h" | |||||
| #include "hybrid/executor/hybrid_execution_context.h" | |||||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||||
| #include "graph/types.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| class UtestGeHybrid : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
| op_desc->SetStreamId(0); | |||||
| op_desc->SetId(0); | |||||
| op_desc->SetWorkspace({}); | |||||
| ; | |||||
| op_desc->SetWorkspaceBytes({}); | |||||
| op_desc->SetInputOffset({}); | |||||
| op_desc->SetOutputOffset({}); | |||||
| ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); | |||||
| bool support_dynamic = true; | |||||
| ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); | |||||
| return op_desc; | |||||
| } | |||||
| TEST_F(UtestGeHybrid, aicore_op_task_init_success) { | |||||
| // build aicore task | |||||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||||
| kernel_with_handle->set_original_kernel_key(""); | |||||
| kernel_with_handle->set_node_info(""); | |||||
| kernel_with_handle->set_block_dim(32); | |||||
| kernel_with_handle->set_args_size(64); | |||||
| string args(64, '1'); | |||||
| kernel_with_handle->set_args(args.data(), 64); | |||||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||||
| context->set_op_index(1); | |||||
| context->set_kernel_type(2); // ccKernelType::TE | |||||
| uint16_t args_offset[9] = {0}; | |||||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
| std::vector<char> kernelBin; | |||||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||||
| std::string kernel_name("kernel/Add"); | |||||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
| ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||||
| char *handle = ""; | |||||
| aicore_task->handle_ = handle; | |||||
| aicore_task->tiling_key_ = 1; | |||||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestGeHybrid, task_update_tiling_info) { | |||||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
| aicore_task->is_single_op_ = true; | |||||
| auto graph = make_shared<ComputeGraph>("graph"); | |||||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key"); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); | |||||
| auto node = graph->AddNode(op_desc); | |||||
| optiling::OpRunInfo tiling_info; | |||||
| ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS); | |||||
| } | |||||
| @@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||||
| void TearDown() {} | void TearDown() {} | ||||
| }; | }; | ||||
| //rt api stub | |||||
| rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| /* | /* | ||||
| TEST_F(UtestSingleOpModel, test_init_model) { | TEST_F(UtestSingleOpModel, test_init_model) { | ||||
| string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
| @@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||||
| std::mutex stream_mu_; | std::mutex stream_mu_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| SingleOp single_op(&stream_mu_, stream_); | |||||
| ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
| // SingleOp single_op(&stream_mu_, stream_); | |||||
| // | |||||
| // ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
| } | } | ||||
| /* | /* | ||||
| TEST_F(UtestSingleOpModel, test_build_kernel_task) { | TEST_F(UtestSingleOpModel, test_build_kernel_task) { | ||||
| @@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||||
| ASSERT_EQ(op_model.Init(), FAILED); | ASSERT_EQ(op_model.Init(), FAILED); | ||||
| } | } | ||||
| */ | */ | ||||
| /* | |||||
| TEST_F(UtestSingleOpModel, test_parse_arg_table) { | TEST_F(UtestSingleOpModel, test_parse_arg_table) { | ||||
| string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
| SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | ||||
| @@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||||
| ASSERT_EQ(op.arg_table_[1].size(), 1); | ASSERT_EQ(op.arg_table_[1].size(), 1); | ||||
| ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ||||
| } | } | ||||
| */ | |||||
| TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||||
| string name = "relu"; | |||||
| string type = "relu"; | |||||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
| op_desc->SetStreamId(0); | |||||
| op_desc->SetId(0); | |||||
| TbeOpTask task; | |||||
| task.op_desc_ = op_desc; | |||||
| task.model_name_ = "resnet_50"; | |||||
| task.model_id_ = 1; | |||||
| TaskDescInfo task_desc_info; | |||||
| uint32_t model_id; | |||||
| task.GetProfilingArgs(task_desc_info, model_id); | |||||
| ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||||
| ASSERT_EQ(model_id, 1); | |||||
| } | |||||
| @@ -0,0 +1,117 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #include "graph/load/model_manager/model_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "runtime/rt.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "single_op/single_op_model.h" | |||||
| #include "single_op/task/tbe_task_builder.h" | |||||
| #include "single_op/task/op_task.h" | |||||
| #include "single_op/task/tbe_task_builder.h" | |||||
| #include "external/register/op_tiling_registry.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| using namespace optiling; | |||||
| class UtestSingleOpTask : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestSingleOpTask, test_build_kernel_task) { | |||||
| string model_data_str = "123456789"; | |||||
| SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | |||||
| model.input_offset_list_.push_back(0); | |||||
| model.input_sizes_.push_back(16); | |||||
| model.output_offset_list_.push_back(0); | |||||
| model.output_sizes_.push_back(16); | |||||
| auto graph = make_shared<ComputeGraph>("graph"); | |||||
| auto op_desc = make_shared<OpDesc>("Add", "Add"); | |||||
| std::vector<char> kernelBin; | |||||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||||
| std::string kernel_name("kernel/Add"); | |||||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
| vector<int64_t> shape{16, 16}; | |||||
| GeShape ge_shape(shape); | |||||
| GeTensorDesc desc(ge_shape); | |||||
| op_desc->AddInputDesc(desc); | |||||
| op_desc->AddOutputDesc(desc); | |||||
| auto node = graph->AddNode(op_desc); | |||||
| std::mutex stream_mu_; | |||||
| rtStream_t stream_ = nullptr; | |||||
| StreamResource stream_resource(0); | |||||
| SingleOp single_op(&stream_resource, &stream_mu_, stream_); | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||||
| kernel_with_handle->set_original_kernel_key(""); | |||||
| kernel_with_handle->set_node_info(""); | |||||
| kernel_with_handle->set_block_dim(32); | |||||
| kernel_with_handle->set_args_size(64); | |||||
| string args(64, '1'); | |||||
| kernel_with_handle->set_args(args.data(), 64); | |||||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||||
| context->set_op_index(1); | |||||
| context->set_kernel_type(2); // ccKernelType::TE | |||||
| uint16_t args_offset[9] = {0}; | |||||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
| model.op_list_[1] = node; | |||||
| TbeOpTask task_tmp; | |||||
| TbeOpTask *task = &task_tmp; | |||||
| ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS); | |||||
| vector<GeTensorDesc> input_desc; | |||||
| vector<DataBuffer> input_buffers; | |||||
| vector<GeTensorDesc> output_desc; | |||||
| vector<DataBuffer> output_buffers; | |||||
| task->node_ = node; | |||||
| OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;}; | |||||
| OpTilingRegistryInterf("Add", op_tiling_func); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key"); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json"); | |||||
| char c = '0'; | |||||
| char* buffer = &c; | |||||
| task->tiling_buffer_ = buffer; | |||||
| task->max_tiling_size_ = 64; | |||||
| task->tiling_data_ = "tiling_data"; | |||||
| task->arg_size_ = 64; | |||||
| uint8_t task_args{0}; | |||||
| task->args_.reset(&task_args); | |||||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||||
| char handle_tmp = '0'; | |||||
| char *handle = &handle_tmp; | |||||
| task->SetHandle(handle); | |||||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||||
| } | |||||
| @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | ||||
| #define RT_KERNEL_CUSTOM_AICPU (0x08) | #define RT_KERNEL_CUSTOM_AICPU (0x08) | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief kernel mode | |||||
| */ | |||||
| #define RT_DEFAULT_KERNEL_MODE (0x00) | |||||
| #define RT_NORMAL_KERNEL_MODE (0x01) | |||||
| #define RT_ALL_KERNEL_MODE (0x02) | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief kernel L1 Fusion Dump bit flags | * @brief kernel L1 Fusion Dump bit flags | ||||
| @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief register device binary | |||||
| * @param [in] bin device binary description | |||||
| * @param [out] handle device binary handle | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief register fast memeory device binary | * @brief register fast memeory device binary | ||||
| @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u | |||||
| RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | ||||
| rtSmDesc_t *smDesc, rtStream_t stream); | rtSmDesc_t *smDesc, rtStream_t stream); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief launch kernel with handle to device | |||||
| * @param [in] handle program | |||||
| * @param [in] devFunc device function description | |||||
| * @param [in] blockDim block dimentions | |||||
| * @param [in] args argments address for kernel function | |||||
| * @param [in] argsSize argements size | |||||
| * @param [in] smDesc shared memory description | |||||
| * @param [in] stream associated stream | |||||
| * @param [in] kernelInfo kernel info | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief launch kernel to device | * @brief launch kernel to device | ||||
| @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { | |||||
| RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | ||||
| RT_MODEL_TASK_STREAM_LABEL_GOTO, | RT_MODEL_TASK_STREAM_LABEL_GOTO, | ||||
| RT_MODEL_TASK_MODEL_EXIT, | RT_MODEL_TASK_MODEL_EXIT, | ||||
| RT_MODEL_TASK_ALL_KERNEL, | |||||
| } rtModelTaskType_t; | } rtModelTaskType_t; | ||||
| typedef enum tagModelStreamType { | typedef enum tagModelStreamType { | ||||
| @@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo { | |||||
| uint16_t *argsOffset; | uint16_t *argsOffset; | ||||
| } rtKernelTaskInfo_t; | } rtKernelTaskInfo_t; | ||||
| typedef struct tagAllKernelTaskInfo { | |||||
| uint16_t blockDim; | |||||
| uint16_t argsCount; | |||||
| uint16_t argsSize; | |||||
| uint16_t reserved; | |||||
| const void *dev_func; | |||||
| void *handle; | |||||
| uint8_t *smDesc; | |||||
| uint8_t *args; | |||||
| uint16_t *argsOffset; | |||||
| } rtAllKernelTaskInfo_t; | |||||
| typedef struct tagKernelTaskInfoEx { | typedef struct tagKernelTaskInfoEx { | ||||
| uint32_t flags; | uint32_t flags; | ||||
| uint32_t argsSize; | uint32_t argsSize; | ||||
| @@ -251,6 +263,7 @@ typedef struct tagTaskInfo { | |||||
| union { | union { | ||||
| rtKernelTaskInfoEx_t kernelTaskEx; | rtKernelTaskInfoEx_t kernelTaskEx; | ||||
| rtKernelTaskInfo_t kernelTask; | rtKernelTaskInfo_t kernelTask; | ||||
| rtAllKernelTaskInfo_t allkernelTask; | |||||
| rtEventTaskInfo_t eventTask; | rtEventTaskInfo_t eventTask; | ||||
| rtStreamSwitchTaskInfo_t streamSwitchTask; | rtStreamSwitchTaskInfo_t streamSwitchTask; | ||||
| rtStreamActiveTaskInfo_t streamActiveTask; | rtStreamActiveTaskInfo_t streamActiveTask; | ||||