| @@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC) | |||
| find_module(runtime libruntime.so ${GE_LIB_PATH}) | |||
| find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH}) | |||
| find_module(resource libresource.so ${GE_LIB_PATH}) | |||
| find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) | |||
| find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | |||
| find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | |||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) | |||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||
| elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | |||
| @@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC) | |||
| else() | |||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
| if(PLATFORM STREQUAL "train") | |||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
| if(PRODUCT STREQUAL "flr3") | |||
| @@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC) | |||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| if(PRODUCT STREQUAL "flr3") | |||
| elseif(PRODUCT STREQUAL "flr1") | |||
| @@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC) | |||
| elseif(PLATFORM STREQUAL "all") | |||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| else() | |||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
| @@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL) | |||
| # common libraries | |||
| find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| if (ENABLE_D) | |||
| @@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES) | |||
| # common libraries | |||
| find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
| set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | |||
| @@ -76,8 +76,8 @@ checkopts() | |||
| ENABLE_GE_ST="on" | |||
| ;; | |||
| t) | |||
| ENABLE_GE_UT="on" | |||
| ;; | |||
| ENABLE_GE_UT="on" | |||
| ;; | |||
| c) | |||
| ENABLE_GE_COV="on" | |||
| ;; | |||
| @@ -185,7 +185,7 @@ build_graphengine() | |||
| # build all the target | |||
| TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | |||
| fi | |||
| make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | |||
| if [ $? -ne 0 ] | |||
| then | |||
| @@ -214,13 +214,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | |||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | |||
| ${OUTPUT_PATH}/ut_libgraph && | |||
| ${OUTPUT_PATH}/ut_libge_multiparts_utest && | |||
| ${OUTPUT_PATH}/ut_libge_distinct_load_utest && | |||
| ${OUTPUT_PATH}/ut_libge_others_utest && | |||
| ${OUTPUT_PATH}/ut_libge_kernel_utest | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libgraph && ${RUN_TEST_CASE} && | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_multiparts_utest && ${RUN_TEST_CASE} && | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_distinct_load_utest && ${RUN_TEST_CASE} && | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_others_utest && ${RUN_TEST_CASE} && | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_kernel_utest && ${RUN_TEST_CASE} | |||
| if [[ "$?" -ne 0 ]]; then | |||
| echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | |||
| echo -e "\033[31m${RUN_TEST_CASE}\033[0m" | |||
| exit 1; | |||
| fi | |||
| echo "Generating coverage statistics, please wait..." | |||
| @@ -249,8 +250,8 @@ generate_package() | |||
| NNENGINE_PATH="plugin/nnengine/ge_config" | |||
| OPSKERNEL_PATH="plugin/opskernel" | |||
| ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so") | |||
| FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so") | |||
| ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") | |||
| FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") | |||
| PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt") | |||
| PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") | |||
| @@ -269,7 +270,7 @@ generate_package() | |||
| mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | |||
| mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | |||
| mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | |||
| cd "${OUTPUT_PATH}" | |||
| find ./ -name graphengine_lib.tar -exec rm {} \; | |||
| @@ -133,7 +133,6 @@ set(TRAIN_SRC_LIST | |||
| "graph/load/model_manager/data_dumper.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| @@ -613,7 +612,6 @@ set(INFER_SRC_LIST | |||
| "graph/load/model_manager/model_manager.cc" | |||
| "graph/load/model_manager/data_inputer.cc" | |||
| "graph/load/model_manager/davinci_model.cc" | |||
| "graph/load/model_manager/davinci_model_parser.cc" | |||
| "graph/load/model_manager/model_utils.cc" | |||
| "graph/load/model_manager/aipp_utils.cc" | |||
| "graph/load/model_manager/tbe_handle_store.cc" | |||
| @@ -32,6 +32,7 @@ | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "register/op_registry.h" | |||
| #include "common/ge/tbe_plugin_manager.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "toolchain/plog.h" | |||
| using domi::OpRegistry; | |||
| @@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map<string, string> &options) { | |||
| // Initialize GE, prepare for execution, call GELib::Initialize | |||
| Status GEInitializeImpl(const std::map<string, string> &options) { | |||
| GELOGT(TRACE_INIT, "GEInitialize start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| // 0.check init status | |||
| if (g_ge_initialized) { | |||
| GELOGW("GEInitialize is called more than once"); | |||
| @@ -157,6 +160,8 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||
| // GE finalize, releasing all resources | |||
| Status GEFinalize() { | |||
| GELOGT(TRACE_INIT, "GEFinalize start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| // check init status | |||
| if (!g_ge_initialized) { | |||
| GELOGW("GEFinalize is called before GEInitialize"); | |||
| @@ -202,9 +207,19 @@ Status GEFinalize() { | |||
| return ret; | |||
| } | |||
| std::string GEGetErrorMsg() { | |||
| return ErrorManager::GetInstance().GetErrorMessage(); | |||
| } | |||
| std::string GEGetWarningMsg() { | |||
| return ErrorManager::GetInstance().GetWarningMessage(); | |||
| } | |||
| // Initialize session,which calls innerSession | |||
| Session::Session(const std::map<string, string> &options) { | |||
| GELOGT(TRACE_INIT, "Session Constructor start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| // check init status | |||
| sessionId_ = 0; | |||
| if (!g_ge_initialized) { | |||
| @@ -235,6 +250,8 @@ Session::Session(const std::map<string, string> &options) { | |||
| Session::Session(const std::map<AscendString, AscendString> &options) { | |||
| GELOGT(TRACE_INIT, "Session Constructor start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| // check init status | |||
| sessionId_ = 0; | |||
| if (!g_ge_initialized) { | |||
| @@ -311,11 +328,13 @@ Session::~Session() { | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | |||
| std::map<std::string, std::string> options; | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| return AddGraph(graph_id, graph, options); | |||
| } | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | |||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| @@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<s | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||
| const std::map<AscendString, AscendString> &options) { | |||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| @@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||
| } | |||
| Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::map<AscendString, AscendString> options; | |||
| return AddGraphWithCopy(graph_id, graph, options); | |||
| } | |||
| @@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | |||
| Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||
| const std::map<AscendString, AscendString> &options) { | |||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| @@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||
| Status Session::RemoveGraph(uint32_t graph_id) { | |||
| GELOGT(TRACE_INIT, "Session RemoveGraph start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| // call RemoveGraph | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (!instance_ptr || !instance_ptr->InitFlag()) { | |||
| @@ -457,6 +480,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) { | |||
| Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) { | |||
| GELOGT(TRACE_INIT, "Session RunGraph start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::vector<Tensor> graph_inputs = inputs; | |||
| // call RunGraph | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| @@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s | |||
| } | |||
| Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); | |||
| } | |||
| Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| std::string str_key; | |||
| if (key != nullptr) { | |||
| str_key = key; | |||
| @@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu | |||
| } | |||
| Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| @@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> | |||
| Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | |||
| RunAsyncCallback callback) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| @@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn | |||
| } | |||
| Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| auto instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| @@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector<std::string> &var_names, std::vec | |||
| } | |||
| Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) { | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| auto instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| @@ -54,7 +54,7 @@ set(SRC_LIST | |||
| "util.cc" | |||
| "properties_manager.cc" | |||
| "types.cc" | |||
| "model_parser/base.cc" | |||
| "model_parser/model_parser.cc" | |||
| "kernel_store.cc" | |||
| "tbe_kernel_store.cc" | |||
| "cust_aicpu_kernel_store.cc" | |||
| @@ -53,6 +53,7 @@ string PluginManager::GetPath() { | |||
| GELOGW("Failed to read the shared library file path!"); | |||
| return string(); | |||
| } else { | |||
| GE_IF_BOOL_EXEC(dl_info.dli_fname == nullptr, return string()); | |||
| std::string so_path = dl_info.dli_fname; | |||
| char path[MMPA_MAX_PATH] = {0}; | |||
| if (so_path.length() >= MMPA_MAX_PATH) { | |||
| @@ -14,22 +14,15 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <climits> | |||
| #include "common/helper/model_cache_helper.h" | |||
| #include <cstdio> | |||
| #include <fstream> | |||
| #include <functional> | |||
| #include "common/ge/ge_util.h" | |||
| #include "common/helper/model_cache_helper.h" | |||
| #include "common/types.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "framework/common/helper/model_helper.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/detail/attributes_holder.h" | |||
| #include "graph/detail/model_serialize_imp.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "init/gelib.h" | |||
| @@ -1682,7 +1675,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const { | |||
| string key_path; | |||
| int32_t priority = 0; | |||
| ModelData model_data; | |||
| ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | |||
| ret = ModelParserBase::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); | |||
| return ret; | |||
| @@ -16,16 +16,10 @@ | |||
| #include "framework/common/helper/model_helper.h" | |||
| #include "common/ge/ge_util.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "framework/omg/model_tool.h" | |||
| #include "framework/omg/version.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| using std::string; | |||
| @@ -465,7 +459,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||
| return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; | |||
| } | |||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| @@ -514,7 +508,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||
| return INTERNAL_ERROR; | |||
| } | |||
| Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| @@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | |||
| GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
| GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu", | |||
| partition_table->num, sizeof(ModelFileHeader), mem_offset); | |||
| if (model_data_size <= mem_offset) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
| @@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | |||
| if (model_data_size <= cur_offset) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||
| "invalid model data, partition_table->num:%u, model data size %u", | |||
| partition_table->num, model_data_size); | |||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
| } | |||
| @@ -14,16 +14,13 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/model_parser/base.h" | |||
| #include "common/helper/model_helper.h" | |||
| #include <securec.h> | |||
| #include "common/model_parser/model_parser.h" | |||
| #include <fstream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/util.h" | |||
| #include "securec.h" | |||
| #include "common/helper/model_helper.h" | |||
| namespace ge { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {} | |||
| @@ -20,6 +20,8 @@ | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/string_util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/types.h" | |||
| #include "runtime/base.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| @@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| const size_t kReportMaxLen = 2048; | |||
| const int32_t kMaxDeviceNum = 256; | |||
| const uint32_t kInteval = 2; | |||
| const std::string kConfigNumsdev = "devNums"; | |||
| const std::string kConfigDevIdList = "devIdList"; | |||
| const std::string kProfStart = "prof_start"; | |||
| const std::string kProfStop = "prof_stop"; | |||
| const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
| const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
| const std::string kModelName = "model_name"; | |||
| const std::string kModelId = "model_id"; | |||
| const std::string kOpNmae = "op_name"; | |||
| const std::string kOptype = "op_type"; | |||
| const std::string kBlockDim = "block_dims"; | |||
| const std::string kTaskId = "task_id"; | |||
| const std::string kStreamId = "stream_id"; | |||
| const std::string kShapeType = "shape_type"; | |||
| const std::string kCurIterNum = "cur_iter_num"; | |||
| const std::string kTaskType = "task_type"; | |||
| const std::string kInput = "input"; | |||
| const std::string kOutput = "output"; | |||
| const std::string kFormat = "format"; | |||
| const std::string kDataType = "data_type"; | |||
| const std::string kShape = "shape"; | |||
| const std::string kIdx = "idx"; | |||
| #endif | |||
| } // namespace | |||
| @@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||
| const TaskDescInfo &task, Json &task_json) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::string data; | |||
| for (const auto &task : task_desc_info) { | |||
| std::string model_name = task.model_name; | |||
| std::string op_name = task.op_name; | |||
| uint32_t block_dim = task.block_dim; | |||
| uint32_t task_id = task.task_id; | |||
| uint32_t stream_id = task.stream_id; | |||
| std::string shape_type = task.shape_type; | |||
| int64_t cur_iter_num = task.cur_iter_num; | |||
| uint32_t task_type = task.task_type; | |||
| data = model_name.append(" ") | |||
| .append(op_name).append(" ") | |||
| .append(std::to_string(block_dim)).append(" ") | |||
| .append(std::to_string(task_id)).append(" ") | |||
| .append(std::to_string(stream_id)).append(" ") | |||
| .append(std::to_string(model_id)).append(" ") | |||
| .append(shape_type).append(" ") | |||
| .append(std::to_string(cur_iter_num)).append(" ") | |||
| .append(std::to_string(task_type)).append("\n"); | |||
| ReporterData reporter_data{}; | |||
| reporter_data.deviceId = device_id; | |||
| reporter_data.data = (unsigned char *)data.c_str(); | |||
| reporter_data.dataLen = data.size(); | |||
| int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||
| if (ret != EOK) { | |||
| GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||
| return; | |||
| } | |||
| int32_t cb_ret = CallMsprofReport(reporter_data); | |||
| if (cb_ret != 0) { | |||
| GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < task.input_format.size(); i++) { | |||
| Json tmp_input; | |||
| tmp_input[kIdx] = i; | |||
| Format format = task.input_format[i]; | |||
| tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||
| DataType data_type = task.input_data_type[i]; | |||
| tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
| tmp_input[kShape] = task.input_shape[i]; | |||
| task_json[kInput] += tmp_input; | |||
| } | |||
| for (size_t i = 0; i < task.output_format.size(); i++) { | |||
| Json tmp_output; | |||
| tmp_output[kIdx] = i; | |||
| Format format = task.output_format[i]; | |||
| tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||
| DataType data_type = task.output_data_type[i]; | |||
| tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
| tmp_output[kShape] = task.output_shape[i]; | |||
| task_json[kOutput] += tmp_output; | |||
| } | |||
| data.clear(); | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||
| uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::string data; | |||
| for (const auto &graph : compute_graph_desc_info) { | |||
| data.append("model_name:") | |||
| .append(graph.model_name) | |||
| .append(" op_name:") | |||
| .append(graph.op_name) | |||
| .append(" op_type:") | |||
| .append(graph.op_type); | |||
| for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||
| data.append(" input_id:") | |||
| .append(std::to_string(i)) | |||
| .append(" input_format:") | |||
| .append(std::to_string(graph.input_format.at(i))) | |||
| .append(" input_data_type:") | |||
| .append(std::to_string(graph.input_data_type.at(i))) | |||
| .append(" input_shape:\""); | |||
| size_t input_shape_len = graph.input_shape.at(i).size(); | |||
| if (input_shape_len == 0) { | |||
| data.append(""); | |||
| } else if (input_shape_len == 1) { | |||
| data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||
| } else { | |||
| for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||
| data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||
| } | |||
| data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||
| } | |||
| data.append("\""); | |||
| } | |||
| for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||
| data.append(" output_id:") | |||
| .append(std::to_string(i)) | |||
| .append(" output_format:") | |||
| .append(std::to_string(graph.output_format.at(i))) | |||
| .append(" output_data_type:") | |||
| .append(std::to_string(graph.output_data_type.at(i))) | |||
| .append(" output_shape:\""); | |||
| size_t output_shape_len = graph.output_shape.at(i).size(); | |||
| if (output_shape_len == 0) { | |||
| data.append(""); | |||
| } else if (output_shape_len == 1) { | |||
| data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||
| } else { | |||
| for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||
| data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||
| } | |||
| data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||
| } | |||
| data.append("\""); | |||
| for (const auto &task : task_desc_info) { | |||
| Json task_info; | |||
| task_info[kModelName] = task.model_name; | |||
| task_info[kModelId] = model_id; | |||
| task_info[kOpNmae] = task.op_name; | |||
| task_info[kOptype] = task.op_type; | |||
| task_info[kBlockDim] = task.block_dim; | |||
| task_info[kTaskType] = task.task_type; | |||
| task_info[kTaskId] = task.task_id; | |||
| task_info[kStreamId] = task.stream_id; | |||
| task_info[kCurIterNum] = task.cur_iter_num; | |||
| task_info[kShapeType] = task.shape_type; | |||
| ProfilingOpInputOutInfo(task, task_info); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| return ; | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| return; | |||
| } | |||
| data.append(" model_id:").append(std::to_string(model_id)); | |||
| data.append(" task_id:").append(std::to_string(graph.task_id)); | |||
| data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||
| data.append("\n"); | |||
| GraphDescReport(device_id, data); | |||
| data.clear(); | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| ReportData(device_id, reported_data, "task_desc_info"); | |||
| } | |||
| #endif | |||
| } | |||
| void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||
| const int32_t &device_id, const string &data, const string &tag_name) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| ReporterData reporter_data{}; | |||
| int ret = -1; | |||
| @@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||
| size_t index = data.size() / kReportMaxLen; | |||
| if (index >= 1) { | |||
| reporter_data.deviceId = device_id; | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
| for (size_t i = 0; i < index; ++i) { | |||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | |||
| reporter_data.dataLen = kReportMaxLen; | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| reporter_data.dataLen = data.size() - kReportMaxLen * index; | |||
| if (reporter_data.dataLen != 0) { | |||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| } else { | |||
| reporter_data.deviceId = device_id; | |||
| reporter_data.data = (unsigned char *)data.c_str(); | |||
| reporter_data.dataLen = data.size(); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| int32_t logic_device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||
| @@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
| GELOGD("current logic_device_id:%d", logic_device_id); | |||
| GELOGD("start ProfilingTaskDescInfo."); | |||
| ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | |||
| GELOGD("start ProfilingGraphDescInfo."); | |||
| ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||
| GELOGD("Report profiling data for GE end."); | |||
| #endif | |||
| } | |||
| @@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||
| static_cast<void *>(&reporter_data), sizeof(ReporterData)); | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||
| const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||
| std::vector<Format> input_format; | |||
| std::vector<std::vector<int64_t>> input_shape; | |||
| std::vector<DataType> input_data_type; | |||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||
| if (input_tensor_desc == nullptr) { | |||
| continue; | |||
| } | |||
| input_format.emplace_back(input_tensor_desc->GetFormat()); | |||
| input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||
| input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||
| } | |||
| std::vector<Format> output_format; | |||
| std::vector<std::vector<int64_t>> output_shape; | |||
| std::vector<DataType> output_data_type; | |||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||
| if (output_tensor_desc == nullptr) { | |||
| continue; | |||
| } | |||
| output_format.emplace_back(output_tensor_desc->GetFormat()); | |||
| output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||
| output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||
| } | |||
| std::vector<Format> format_default = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||
| std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||
| task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||
| task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||
| task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||
| task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||
| task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||
| task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | |||
| std::string &fp_point, std::string &bp_point) { | |||
| // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | |||
| @@ -54,6 +54,8 @@ namespace { | |||
| } // namespace | |||
| namespace ge { | |||
| class OpDesc; | |||
| using OpDescPtr = std::shared_ptr<OpDesc>; | |||
| struct DeviceSubsInfo { | |||
| uint64_t module; | |||
| uint32_t subscribe_count; | |||
| @@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| bool ProfilingModelExecuteOn() const; | |||
| // is_execute_profiling_ only used by ge option and env | |||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||
| void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const int32_t &device_id); | |||
| void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
| const int32_t &device_id); | |||
| void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||
| Status PluginInit() const; | |||
| void PluginUnInit() const; | |||
| Status CallMsprofReport(ReporterData &reporter_data) const; | |||
| @@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | |||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | |||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||
| private: | |||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | |||
| Status ParseOptions(const std::string &options); | |||
| @@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | |||
| vector<int32_t> &device_list); | |||
| uint64_t GetProfilingModule(); | |||
| void GraphDescReport(const int32_t &device_id, const string &data); | |||
| void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | |||
| void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | |||
| @@ -33,7 +33,6 @@ set(SRC_LIST | |||
| "../model/ge_model.cc" | |||
| "../model/ge_root_model.cc" | |||
| "../graph/load/model_manager/davinci_model.cc" | |||
| "../graph/load/model_manager/davinci_model_parser.cc" | |||
| "../graph/load/model_manager/model_manager.cc" | |||
| "../graph/load/model_manager/tbe_handle_store.cc" | |||
| "../graph/load/model_manager/cpu_queue_schedule.cc" | |||
| @@ -250,15 +249,14 @@ target_link_options(ge_executor_shared PRIVATE | |||
| target_link_libraries(ge_executor_shared PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| msprofiler | |||
| static_mmpa | |||
| -Wl,--no-as-needed | |||
| ge_common | |||
| runtime | |||
| slog | |||
| mmpa | |||
| graph | |||
| register | |||
| error_manager | |||
| ascend_hal_stub | |||
| ascend_protobuf | |||
| c_sec | |||
| -Wl,--as-needed | |||
| @@ -16,7 +16,6 @@ | |||
| #include "executor/ge_executor.h" | |||
| #include <cce/cce.h> | |||
| #include <cce/compiler_stub.h> | |||
| #include <ctime> | |||
| #include <iostream> | |||
| #include "common/debug/log.h" | |||
| @@ -24,19 +23,11 @@ | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/execute/graph_execute.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/model.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "single_op/single_op_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| @@ -454,7 +445,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| if (all_data_dims[i] < 0) { | |||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | |||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "Static dims should be same, index: %zu value: %lu should be %ld", | |||
| i, dynamic_dims[i], all_data_dims[i]); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| @@ -930,12 +922,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size | |||
| Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| SingleOp **single_op) { | |||
| return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); | |||
| return LoadSingleOpV2(model_name, modelData, stream, single_op, 0); | |||
| } | |||
| Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| SingleOp **single_op, const uint64_t model_id) { | |||
| return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id); | |||
| } | |||
| Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| DynamicSingleOp **single_op) { | |||
| return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); | |||
| return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0); | |||
| } | |||
| Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| DynamicSingleOp **single_op, const uint64_t model_id) { | |||
| return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id); | |||
| } | |||
| Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
| @@ -147,7 +147,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||
| return FAILED; | |||
| } | |||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index, | |||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | |||
| bool attr) { | |||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | |||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | |||
| @@ -671,6 +671,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||
| Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
| bool is_offline) { | |||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
| impl_->is_offline_ = is_offline; | |||
| if (!is_offline) { | |||
| (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); | |||
| } | |||
| @@ -709,8 +711,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| GELOGI("ATC parser success in single op build."); | |||
| GeRootModelPtr ge_root_model = nullptr; | |||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
| impl_->is_offline_ = is_offline; | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||
| map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| @@ -723,7 +723,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph(); | |||
| GeModelPtr &ge_model = name_to_ge_model.begin()->second; | |||
| GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph)); | |||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||
| GELOGI("After build model, The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||
| bool all_shape = false; | |||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | |||
| @@ -738,6 +738,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
| } | |||
| GELOGI("Start save GeModel to Model buffer"); | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | |||
| return SUCCESS; | |||
| } | |||
| @@ -753,10 +754,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| */ | |||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, const string &model_file_name) { | |||
| GELOGI("Start to build single op offline model."); | |||
| GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
| ModelBufferData model_buff; | |||
| OpEngineType engine_type = ENGINE_SYS; | |||
| return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||
| GELOGI("Finish build single offline model, status: %u", status); | |||
| return status; | |||
| } | |||
| /** | |||
| @@ -772,8 +775,10 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs, OpEngineType engine_type, | |||
| ModelBufferData &model_buff) { | |||
| GELOGI("Start to build single op online"); | |||
| return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | |||
| GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
| Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | |||
| GELOGI("Finish build single online model, status: %u", status); | |||
| return status; | |||
| } | |||
| Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| @@ -798,8 +803,7 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor | |||
| } | |||
| } else { | |||
| for (const auto &in_desc : inputs) { | |||
| GeTensorDesc input_desc = in_desc.GetTensorDesc(); | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); | |||
| arg_index++; | |||
| } | |||
| } | |||
| @@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||
| } | |||
| ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | |||
| int64_t dim_index, int64_t &output_mem_size, | |||
| int64_t &batch_dim_num, int64_t &out_size) { | |||
| int64_t dim_index, int64_t &output_mem_size, | |||
| int64_t &batch_dim_num, int64_t &out_size) { | |||
| graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Opdesc GetSize failed!"); | |||
| @@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second), | |||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | |||
| "Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||
| } | |||
| for (auto pair : memory_offset_) { | |||
| @@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| } | |||
| Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | |||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { | |||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | |||
| GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | |||
| auto iter = memory_offset_.find(memory_type); | |||
| if (iter == memory_offset_.end()) { | |||
| @@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| std::map<int32_t, int32_t> out2ins; | |||
| GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | |||
| // output is beginning offset, set offset for input; only support this case now | |||
| if (out2ins.size() == 1 && out2ins.begin()->second == 0) { | |||
| if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | |||
| auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | |||
| output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | |||
| peer_op_desc->SetOutputOffset(output_list); | |||
| GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | |||
| output_list_this.at(out2ins.begin()->first), peer_output_offset); | |||
| } else { | |||
| GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first, | |||
| out2ins.begin()->second, out2ins.size()); | |||
| GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), | |||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); | |||
| } | |||
| // first input is beginning offset | |||
| mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); | |||
| @@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3 | |||
| bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||
| const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) { | |||
| for (const auto &in_node : input_continuous_node->GetInDataNodes()) { | |||
| if (in_node->GetType() == VARIABLE) { | |||
| GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(), | |||
| in_node->GetName().c_str()); | |||
| return true; | |||
| } | |||
| auto iter = node_2_continuous_type.find(in_node); | |||
| // In node's topo order in the front, so function can not be exception | |||
| auto continuous_type = iter->second; | |||
| @@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||
| } | |||
| ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | |||
| uint32_t continuous_type) { | |||
| uint32_t continuous_type, | |||
| bool reverse_refresh) { | |||
| int64_t mem_clean_start = 0; | |||
| int64_t mem_clean_size = 0; | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | |||
| auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type); | |||
| auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | |||
| continuous_type, reverse_refresh); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "Assign continuous input memory failed!"); | |||
| return ret; | |||
| @@ -131,13 +131,14 @@ class GraphMemoryAssigner { | |||
| std::map<NodePtr, uint32_t> &node_2_continuous_type); | |||
| ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, | |||
| uint32_t continuous_type); | |||
| uint32_t continuous_type, bool reverse_refresh=false); | |||
| ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | |||
| map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | |||
| ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | |||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); | |||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, | |||
| bool reverse_refresh = false); | |||
| ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); | |||
| @@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
| // subgraph of dynamic graph no need to find index, has been found in parent graph | |||
| if (IsSubGraphOfDynamicGraph(graph)) { | |||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||
| } | |||
| GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | |||
| is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | |||
| profiling_point.end_index.size() ); | |||
| profiling_point.end_index.size()); | |||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||
| if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | |||
| @@ -19,12 +19,8 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| #include "runtime/mem.h" | |||
| namespace ge { | |||
| GraphExecutor::GraphExecutor() | |||
| @@ -20,19 +20,13 @@ | |||
| #include <vector> | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/util.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "omm/csa_interact.h" | |||
| #include "runtime/dev.h" | |||
| namespace ge { | |||
| GraphLoader::GraphLoader() = default; | |||
| GraphLoader::~GraphLoader() = default; | |||
| Status GraphLoader::UnloadModel(uint32_t model_id) { | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| @@ -120,7 +114,6 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) { | |||
| Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority, | |||
| ModelData &model_data) { | |||
| Status ret; | |||
| if (!CheckInputPathValid(path)) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| @@ -132,16 +125,15 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||
| Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); | |||
| if (model_data.model_data != nullptr) { | |||
| delete[] static_cast<char *>(model_data.model_data); | |||
| model_data.model_data = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| return ret; | |||
| } | |||
| Status GraphLoader::CommandHandle(const Command &command) { | |||
| @@ -32,9 +32,9 @@ | |||
| namespace ge { | |||
| class GraphLoader { | |||
| public: | |||
| GraphLoader(); | |||
| GraphLoader() = default; | |||
| virtual ~GraphLoader(); | |||
| virtual ~GraphLoader() = default; | |||
| GraphLoader(const GraphLoader &in) = delete; | |||
| @@ -92,9 +92,35 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||
| const uint32_t kEndOfSequenceNew = 507005; | |||
| const int32_t kModelAbortNormal = 0x0704000e; | |||
| const int32_t kModelAbortNormalNew = 507024; | |||
| const uint32_t kInteval = 2; | |||
| const char *const kModelName = "model_name"; | |||
| const char *const kModeleId = "model_id"; | |||
| const char *const kLoadStartTime = "load_start_time"; | |||
| const char *const kLoadEndTime = "load_end_time"; | |||
| const char *const kFusionOpInfo = "fusion_op_info"; | |||
| const char *const kFusionOpName = "fusion_op_name"; | |||
| const char *const kOriginalOpNum = "origin_op_num"; | |||
| const char *const kOriginalOpName = "origin_op_name"; | |||
| const char *const kStreamId = "stream_id"; | |||
| const char *const kFusionOpMemoryInfo = "memory_info"; | |||
| const char *const kInputSize = "input_size"; | |||
| const char *const kOutputSize = "output_size"; | |||
| const char *const kWeightSize = "weight_size"; | |||
| const char *const kWorkSpaceSize = "workspace_size"; | |||
| const char *const kTotalSize = "total_size"; | |||
| const char *const kTaskCount = "task_count"; | |||
| const char *const kTaskId = "task_id"; | |||
| const char* const kRequestId = "request_id"; | |||
| const char* const kThreadId = "thread_id"; | |||
| const char* const kInputBeginTime = "input_begin_time"; | |||
| const char* const kInputEndTime = "input_end_time"; | |||
| const char* const kInferBeginTime = "infer_begin_time"; | |||
| const char* const kInferEndTime = "infer_end_time"; | |||
| const char* const kOutputBeginTime = "output_start_time"; | |||
| const char* const kOutputEndTime = "output_end_time"; | |||
| inline bool IsDataOp(const std::string &node_type) { | |||
| return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | |||
| return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||
| } | |||
| inline bool IsTbeTask(const OpDescPtr &op_desc) { | |||
| @@ -187,12 +213,12 @@ DavinciModel::~DavinciModel() { | |||
| UnbindTaskSinkStream(); | |||
| for (size_t i = 0; i < label_list_.size(); ++i) { | |||
| if (label_list_[i] != nullptr) { | |||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); | |||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i); | |||
| } | |||
| } | |||
| for (size_t i = 0; i < stream_list_.size(); ++i) { | |||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); | |||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i); | |||
| } | |||
| for (size_t i = 0; i < event_list_.size(); ++i) { | |||
| @@ -360,7 +386,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", | |||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu].", | |||
| runtime_param_.graph_id, mem_base_, data_size); | |||
| if (!is_inner_weight_base_) { | |||
| @@ -381,7 +407,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| is_inner_p2p_mem_base_ = true; | |||
| } | |||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed"); | |||
| runtime_param_.mem_base = mem_base_; | |||
| runtime_param_.weight_base = weights_mem_base_; | |||
| runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | |||
| @@ -391,7 +417,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| Status DavinciModel::InitVariableMem() { | |||
| // malloc variable memory base | |||
| var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | |||
| if (TotalVarMemSize() && var_mem_base_ == nullptr) { | |||
| if (TotalVarMemSize() && (var_mem_base_ == nullptr)) { | |||
| Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Malloc variable memory failed."); | |||
| @@ -500,25 +526,25 @@ Status DavinciModel::DoTaskSink() { | |||
| } | |||
| GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | |||
| GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_); | |||
| GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_); | |||
| GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | |||
| if (known_node_) { | |||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed"); | |||
| } | |||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||
| GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | |||
| @@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| } | |||
| Status DavinciModel::ReportProfilingData() { | |||
| std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
| Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetComputeGraphInfo failed."); | |||
| return ret; | |||
| } | |||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||
| GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | |||
| return SUCCESS; | |||
| @@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||
| } | |||
| Status DavinciModel::SinkModelProfile() { | |||
| // profiling plugin must be registered | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| ReporterData reporter_data{}; | |||
| // report model data tag name | |||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
| return FAILED, "Sink model tag memcpy error."); | |||
| // Model Header | |||
| std::string name = om_name_.empty() ? name_ : om_name_; | |||
| size_t name_len = name.size(); | |||
| reporter_data.deviceId = device_id_; | |||
| reporter_data.data = (unsigned char *)&name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)name.c_str(); | |||
| reporter_data.dataLen = name.size(); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| uint32_t model_id = this->Id(); | |||
| reporter_data.data = (unsigned char *)&model_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // Load Start/End Time | |||
| int64_t start_time = this->GetLoadBeginTime(); | |||
| reporter_data.data = (unsigned char *)&start_time; | |||
| reporter_data.dataLen = sizeof(int64_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| int64_t end_time = this->GetLoadEndTime(); | |||
| reporter_data.data = (unsigned char *)&end_time; | |||
| reporter_data.dataLen = sizeof(int64_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| Json model_load_info; | |||
| model_load_info[kModelName] = name; | |||
| model_load_info[kModeleId] = model_id; | |||
| model_load_info[kLoadStartTime] = start_time; | |||
| model_load_info[kLoadEndTime] = end_time; | |||
| // fusion op info | |||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||
| using Range = std::pair<CIT, CIT>; | |||
| for (const ProfileInfo &profile : profile_list_) { | |||
| // op name after fusion | |||
| Json fusion_op_info; | |||
| string fusion_op_name = profile.fusion_info.op_name; | |||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||
| reporter_data.dataLen = fusion_op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // original op name before fusion | |||
| uint32_t op_num = profile.fusion_info.original_op_names.size(); | |||
| reporter_data.data = (unsigned char *)&op_num; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| vector<string> original_name; | |||
| for (uint32_t k = 0; k < op_num; k++) { | |||
| std::string op_name = profile.fusion_info.original_op_names[k]; | |||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||
| reporter_data.data = (unsigned char *)&op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||
| reporter_data.dataLen = op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| } | |||
| // stream id info | |||
| original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||
| } | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(fusion_op_name); | |||
| if (iter != profiler_report_op_info_.end()) { | |||
| stream_id = iter->second.second; | |||
| } | |||
| reporter_data.data = (unsigned char *)&stream_id; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // memory info | |||
| reporter_data.data = (unsigned char *)&profile.memory_info; | |||
| reporter_data.dataLen = sizeof(profile.memory_info); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // task info | |||
| reporter_data.data = (unsigned char *)&profile.task_count; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| fusion_op_info[kFusionOpName] = fusion_op_name; | |||
| fusion_op_info[kOriginalOpNum] = op_num; | |||
| fusion_op_info[kOriginalOpName] = original_name; | |||
| fusion_op_info[kStreamId] = stream_id; | |||
| fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||
| fusion_op_info[kTaskCount] = profile.task_count; | |||
| vector<uint32_t> task_id; | |||
| Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | |||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||
| uint32_t task_id = idx->second; | |||
| reporter_data.data = (unsigned char *)&task_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| task_id.push_back(idx->second); | |||
| } | |||
| fusion_op_info[kTaskId] = task_id; | |||
| model_load_info[kFusionOpInfo] += fusion_op_info; | |||
| } | |||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
| // profiling plugin must be registered | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| ReporterData reporter_data{}; | |||
| string name = om_name_.empty() ? name_ : om_name_; | |||
| Json model_time_info; | |||
| model_time_info[kModelName] = name; | |||
| model_time_info[kModeleId] = this->Id(); | |||
| model_time_info[kRequestId] = current_data.request_id; | |||
| model_time_info[kThreadId] = GetDataInputTid(); | |||
| model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||
| model_time_info[kInputEndTime] = time_info_.processEndTime; | |||
| model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||
| model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||
| model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||
| model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||
| // report model data tag name | |||
| std::string tag_name; | |||
| tag_name.append("model_time_info_") | |||
| .append(std::to_string(this->Id())) | |||
| .append("_") | |||
| .append(std::to_string(current_data.index)); | |||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
| return FAILED, "Sink model tag memcpy error."); | |||
| // device id | |||
| reporter_data.deviceId = device_id_; | |||
| // Model Header | |||
| string name; | |||
| if (!om_name_.empty()) { | |||
| name = om_name_; | |||
| } else { | |||
| name = name_; | |||
| } | |||
| size_t name_len = name.size(); | |||
| reporter_data.data = (unsigned char *)&name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)name.c_str(); | |||
| reporter_data.dataLen = name.size(); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // request id | |||
| uint64_t request_id = current_data.request_id; | |||
| reporter_data.data = (unsigned char *)&request_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| // thread id | |||
| int32_t thread_id = GetDataInputTid(); | |||
| reporter_data.data = (unsigned char *)&thread_id; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| // time info | |||
| time_info_.modelId = this->Id(); | |||
| reporter_data.data = (unsigned char *)&time_info_; | |||
| reporter_data.dataLen = sizeof(struct timeInfo); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| .append(std::to_string(this->Id())) | |||
| .append("_") | |||
| .append(std::to_string(current_data.index)); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
| return SUCCESS; | |||
| } | |||
| @@ -2641,6 +2589,7 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| bool seq_end_flag = false; | |||
| uint32_t model_id = model->Id(); | |||
| uint32_t device_id = model->GetDeviceId(); | |||
| GetContext().SetWorkStreamId(model->GetWorkStreamId()); | |||
| GELOGI("Model Run thread start, model_id:%u.", model_id); | |||
| rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id)); | |||
| @@ -2807,6 +2756,7 @@ Status DavinciModel::ModelRunStart() { | |||
| int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal); | |||
| maxDumpOpNum_ = maxDumpOpNum; | |||
| work_stream_id_ = GetContext().WorkStreamId(); | |||
| CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | |||
| GELOGI("model tread create success, model id:%u.", model_id_); | |||
| return SUCCESS; | |||
| @@ -3069,13 +3019,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.op_type = op->GetType(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| // task type | |||
| task_desc_info.task_type = kTaskTypeInvalid; | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| @@ -3107,7 +3059,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| Status DavinciModel::DistributeTask() { | |||
| @@ -3332,7 +3283,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||
| /// | |||
| Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||
| const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | |||
| string input_or_output = "input"; | |||
| string input_or_output; | |||
| is_input ? input_or_output = "input" : input_or_output = "output"; | |||
| if (blobs.size() != data_info.size()) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | |||
| @@ -3342,7 +3293,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||
| for (const auto &data : data_info) { | |||
| if (data.first >= blobs.size()) { // check data index. | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||
| input_or_output.c_str(), data.first, blobs.size()); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| @@ -4007,41 +3959,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||
| main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | |||
| } | |||
| Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
| auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||
| for (auto &op_desc : all_op_desc) { | |||
| ComputeGraphDescInfo compute_graph_info; | |||
| if (!om_name_.empty()) { | |||
| compute_graph_info.model_name = om_name_; | |||
| } else { | |||
| compute_graph_info.model_name = name_; | |||
| } | |||
| std::vector<Format> format = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
| compute_graph_info.op_name = op_desc.op_name; | |||
| compute_graph_info.op_type = op_desc.op_type; | |||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
| if (iter != profiler_report_op_info_.end()) { | |||
| task_id = iter->second.first; | |||
| stream_id = iter->second.second; | |||
| } | |||
| compute_graph_info.task_id = task_id; | |||
| compute_graph_info.stream_id = stream_id; | |||
| graph_desc_info.emplace_back(compute_graph_info); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | |||
| if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | |||
| tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | |||
| @@ -4133,10 +4050,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op | |||
| int64_t data_input_size; | |||
| (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | |||
| GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | |||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||
| } | |||
| } | |||
| @@ -412,6 +412,8 @@ class DavinciModel { | |||
| /// | |||
| uint64_t GetSessionId() const { return session_id_; } | |||
| uint64_t GetWorkStreamId() const { return work_stream_id_; } | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief SetDeviceId | |||
| @@ -840,9 +842,6 @@ class DavinciModel { | |||
| Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
| // get desc info of graph for profiling | |||
| Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||
| Status InitL1DataDumperArgs(); | |||
| @@ -960,6 +959,7 @@ class DavinciModel { | |||
| vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | |||
| uint64_t session_id_; | |||
| uint64_t work_stream_id_; | |||
| uint32_t device_id_; | |||
| @@ -1,23 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| namespace ge { | |||
| DavinciModelParser::DavinciModelParser() {} | |||
| DavinciModelParser::~DavinciModelParser() {} | |||
| } // namespace ge | |||
| @@ -1,46 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||
| #include <securec.h> | |||
| #include <memory> | |||
| #include "common/debug/log.h" | |||
| #include "common/ge_types.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "common/types.h" | |||
| #include "common/util.h" | |||
| namespace ge { | |||
| class DavinciModelParser : public ModelParserBase { | |||
| public: | |||
| /// | |||
| /// @ingroup hiai | |||
| /// @brief constructor | |||
| /// | |||
| DavinciModelParser(); | |||
| /// | |||
| /// @ingroup hiai | |||
| /// @brief destructor | |||
| /// | |||
| ~DavinciModelParser(); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ | |||
| @@ -18,23 +18,15 @@ | |||
| #include <string> | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/l2_cache_optimize.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "model/ge_root_model.h" | |||
| #include "graph/common/local_context.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "common/formats/utils/formats_trans_utils.h" | |||
| #include "hybrid/hybrid_davinci_model.h" | |||
| namespace ge { | |||
| thread_local uint32_t device_count = 0; | |||
| @@ -1403,7 +1395,7 @@ Status ModelManager::LaunchCustAicpuSo() { | |||
| Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) { | |||
| uint8_t *model_data = nullptr; | |||
| uint32_t model_len = 0; | |||
| Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | |||
| Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); | |||
| OmFileLoadHelper om_file_helper; | |||
| @@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, | |||
| kBinSizeUnit8 * kMByteSize, | |||
| kBinSizeUnit32 * kMByteSize, | |||
| kBinSizeUnit128 * kMByteSize, | |||
| kGByteSize, | |||
| kBinSizeUnit4 * kGByteSize, | |||
| kBinSizeUnit16 * kGByteSize, | |||
| kBinSizeUnit26 * kGByteSize}; | |||
| kBinSizeUnit256 * kMByteSize, | |||
| kBinSizeUnit512 * kMByteSize, | |||
| kGByteSize}; | |||
| static bool BlockComparator(const Block *left, const Block *right) { | |||
| if (left->size != right->size) { | |||
| @@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) { | |||
| size_t GetAllocationSize(size_t size) { | |||
| size_t index = GetBinIndex(size); | |||
| return bin_ranges[index]; | |||
| if (bin_ranges[index] >= size) { | |||
| return bin_ranges[index]; | |||
| } | |||
| return kGByteSize * ((size + kGByteSize - 1) / kGByteSize); | |||
| } | |||
| /// | |||
| @@ -119,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||
| } | |||
| uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | |||
| GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | |||
| uint8_t *ptr = nullptr; | |||
| size = GetBlockSize(size); | |||
| Block *block = FindFreeBlock(size, org_ptr, device_id); | |||
| @@ -253,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui | |||
| } | |||
| Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | |||
| GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); | |||
| auto memory_size = GetAllocationSize(size); | |||
| const std::string purpose = "Memory for caching."; | |||
| auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); | |||
| @@ -36,17 +36,17 @@ namespace ge { | |||
| constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes | |||
| constexpr size_t kBinSizeUnit4 = 4; | |||
| constexpr size_t kBinSizeUnit8 = 8; | |||
| constexpr size_t kBinSizeUnit16 = 16; | |||
| constexpr size_t kBinSizeUnit26 = 26; | |||
| constexpr size_t kBinSizeUnit32 = 32; | |||
| constexpr size_t kBinSizeUnit128 = 128; | |||
| constexpr size_t kBinSizeUnit256 = 256; | |||
| constexpr size_t kBinSizeUnit512 = 512; | |||
| constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold | |||
| constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold | |||
| constexpr size_t kKByteSize = 1024; | |||
| constexpr size_t kMByteSize = 1048576; // 1024 * 1024 | |||
| constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 | |||
| static const uint32_t kNumBins = 8; | |||
| static const uint32_t kNumBins = 7; | |||
| class MemoryAllocator; | |||
| @@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { | |||
| return FAILED; | |||
| } | |||
| if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { | |||
| GELOGI("Need to process multi batch for compute graph."); | |||
| GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType().c_str()); | |||
| GetLocalOmgContext().need_multi_batch = true; | |||
| break; | |||
| } | |||
| @@ -348,7 +348,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | |||
| (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | |||
| } | |||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]."); | |||
| } | |||
| GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | |||
| @@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||
| } | |||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | |||
| compute_graph->GetGraphID(), subgraph, | |||
| compute_graph->GetName(), session_id, | |||
| compute_graph->GetName(), session_id, GetContext().WorkStreamId(), | |||
| GetThreadLocalContext()); | |||
| if (!f.valid()) { | |||
| GELOGE(FAILED, "Future is invalid"); | |||
| @@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||
| } | |||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | |||
| compute_graph->GetGraphID(), subgraph, | |||
| compute_graph->GetName(), session_id, | |||
| compute_graph->GetName(), session_id, GetContext().WorkStreamId(), | |||
| GetThreadLocalContext()); | |||
| if (!f.valid()) { | |||
| GELOGE(FAILED, "Future is invalid"); | |||
| @@ -734,8 +734,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||
| } | |||
| Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | |||
| GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||
| static_cast<int>(mode), ge::GetContext().DeviceId()); | |||
| GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", | |||
| session_id, graph_id, static_cast<int>(mode), ge::GetContext().DeviceId()); | |||
| rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -758,7 +758,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { | |||
| GE_TIMESTAMP_START(RunCustomPass); | |||
| GraphPtr graph = std::const_pointer_cast<Graph>(const_graph); | |||
| GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", | |||
| GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail", | |||
| comp_graph->GetName().c_str()); | |||
| GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); | |||
| return SUCCESS; | |||
| @@ -776,7 +776,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()), | |||
| "BuildJsonObject Failed") | |||
| GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s", | |||
| GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s.", | |||
| compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(), | |||
| compute_graph->GetName().c_str()); | |||
| GE_DUMP(compute_graph, "PreRunBegin"); | |||
| @@ -797,7 +797,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| if (run_optimize_original_graph) { | |||
| Status ret = PreRunOptimizeOriginalGraph(graph_node, inputs, compute_graph, session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||
| GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -869,7 +869,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||
| // release rts generate context | |||
| RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId()); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "PreRun Failed."); | |||
| GELOGE(ret, "PreRun Failed. graph_id:%u", graph_node->GetGraphId()); | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -1209,7 +1209,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const | |||
| Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | |||
| GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | |||
| GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||
| GELOGD("[BuildGraph] start to build graph, graph_id:%u.", graph_id); | |||
| if (inputs.empty()) { | |||
| GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | |||
| } | |||
| @@ -1241,7 +1241,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen | |||
| ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | |||
| graph_node->SetRunFlag(false); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed!"); | |||
| GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u", graph_id); | |||
| return GE_GRAPH_PRERUN_FAILED; | |||
| } | |||
| @@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | |||
| new (std::nothrow) | |||
| LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | |||
| GE_CHK_STATUS_RET( | |||
| after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||
| GE_TIMESTAMP_START(after_merge_passes); | |||
| auto ret = after_merge_passes.Run(compute_graph); | |||
| @@ -2509,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||
| const SubGraphInfoPtr &sub_graph_info_ptr, | |||
| const std::string &root_graph_name, | |||
| uint64_t session_id, | |||
| uint64_t work_stream_id, | |||
| const GEThreadLocalContext &ge_context) { | |||
| if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | |||
| GetContext().SetWorkStreamId(work_stream_id); | |||
| GetContext().SetSessionId(session_id); | |||
| GetThreadLocalContext() = ge_context; | |||
| graph_manager->UpdateLocalOmgContext(root_graph_id); | |||
| @@ -2557,7 +2558,8 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector<ge | |||
| uint64_t session_id, RunAsyncCallback callback) { | |||
| GELOGI("[GraphManager] Start to run graph async, graph_id=%u, inputsSize=%zu.", graph_id, inputs.size()); | |||
| bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, GetThreadLocalContext(), callback})); | |||
| bool ret = prerun_args_q_.Push(PreRunArgs({graph_id, inputs, session_id, | |||
| GetContext().WorkStreamId(), GetThreadLocalContext(), callback})); | |||
| if (!ret) { | |||
| GELOGE(FAILED, "[GraphManager] Run graph async failed, graph_id=%u.", graph_id); | |||
| return FAILED; | |||
| @@ -2644,6 +2646,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| GELOGI("A new loop start."); | |||
| GetContext().SetWorkStreamId(args.work_stream_id); | |||
| GetContext().SetSessionId(args.session_id); | |||
| GetThreadLocalContext() = args.context; | |||
| graph_manager->UpdateLocalOmgContext(args.graph_id); | |||
| @@ -2725,8 +2728,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| ge_root_model = graph_node->GetGeRootModel(); | |||
| } | |||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor, | |||
| ge_root_model, GetThreadLocalContext(), args.callback })); | |||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id, | |||
| args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); | |||
| GELOGI("Loop end."); | |||
| } | |||
| } | |||
| @@ -2825,6 +2828,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| GELOGI("A new loop start."); | |||
| GetContext().SetWorkStreamId(args.work_stream_id); | |||
| GetContext().SetSessionId(args.session_id); | |||
| GetThreadLocalContext() = args.context; | |||
| graph_manager->UpdateLocalOmgContext(args.graph_id); | |||
| @@ -196,6 +196,7 @@ class GraphManager { | |||
| GraphId graph_id; | |||
| std::vector<ge::InputTensorInfo> input_tensor; | |||
| uint64_t session_id; | |||
| uint64_t work_stream_id; | |||
| GEThreadLocalContext context; | |||
| RunAsyncCallback callback; | |||
| }; | |||
| @@ -204,6 +205,7 @@ class GraphManager { | |||
| GraphNodePtr graph_node; | |||
| GraphId graph_id; | |||
| uint64_t session_id; | |||
| uint64_t work_stream_id; | |||
| std::vector<ge::InputTensorInfo> input_tensor; | |||
| GeRootModelPtr ge_root_model; | |||
| GEThreadLocalContext context; | |||
| @@ -221,6 +223,7 @@ class GraphManager { | |||
| const SubGraphInfoPtr &sub_graph_info_ptr, | |||
| const std::string &root_graph_name, | |||
| uint64_t session_id, | |||
| uint64_t work_stream_id, | |||
| const GEThreadLocalContext &ge_context); | |||
| Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | |||
| void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | |||
| @@ -26,6 +26,7 @@ | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/manager/host_mem_allocator.h" | |||
| #include "graph/node.h" | |||
| #include "runtime/mem.h" | |||
| @@ -139,7 +140,6 @@ class MemoryAllocator { | |||
| using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | |||
| class CachingAllocator; | |||
| class RdmaPoolAllocator; | |||
| class HostMemAllocator; | |||
| class MemManager { | |||
| public: | |||
| MemManager(); | |||
| @@ -24,9 +24,9 @@ namespace { | |||
| constexpr uint32_t kValidInputNodeOutputNum = 1; | |||
| constexpr int32_t kAssignRefInputIndex = 0; | |||
| constexpr int32_t kAssignValueInputIndex = 1; | |||
| static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||
| ge::CONSTANT, ge::CONSTANTOP, | |||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||
| const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||
| ge::CONSTANT, ge::CONSTANTOP, | |||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||
| } | |||
| Status AssignRemovePass::Run(NodePtr &node) { | |||
| @@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | |||
| } | |||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
| &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||
| return statistic_of_ge_constant_folding_; | |||
| } | |||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
| &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||
| return statistic_of_op_constant_folding_; | |||
| } | |||
| @@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { | |||
| return NOT_CHANGED; | |||
| } | |||
| GELOGI("FlowCtrl pass begin"); | |||
| GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); | |||
| bool graph_change = false; | |||
| // 1. Add FP/BP flow ctrl (big cycle) | |||
| for (auto &node : compute_graph->GetDirectNode()) { | |||
| @@ -80,6 +80,16 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { | |||
| graph_change = true; | |||
| } | |||
| } | |||
| // add edge operation below depends on memcpy node in itertor loop set single stream,or may cause block | |||
| for (auto &active_node : active_nodes_in_iter_loop_) { | |||
| auto ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(), | |||
| assign_add_node_in_fpbp_loop_->GetInControlAnchor()); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGW("add control edge between iter_loop_node:%s and fpbp_loop_node:%s fail, may cause block", | |||
| active_node->GetName().c_str(), assign_add_node_in_fpbp_loop_->GetName().c_str()); | |||
| } | |||
| } | |||
| GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed"); | |||
| return graph_change ? SUCCESS : NOT_CHANGED; | |||
| } | |||
| @@ -279,16 +289,16 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co | |||
| * loopIncrement | |||
| */ | |||
| // Insert AssignAdd node | |||
| NodePtr assign_add_node = | |||
| assign_add_node_in_fpbp_loop_ = | |||
| InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); | |||
| if (assign_add_node == nullptr || switch_node == nullptr) { | |||
| if (assign_add_node_in_fpbp_loop_ == nullptr || switch_node == nullptr) { | |||
| GELOGE(PARAM_INVALID, "assign add node or switch node is null"); | |||
| return FAILED; | |||
| } | |||
| string active_name = switch_node->GetName() + "_StreamActive"; | |||
| // add attr for stream assign model to break branch. | |||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed"); | |||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node_in_fpbp_loop_, active_name), "set stream label failed"); | |||
| // used for stream assign to find true branch | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | |||
| @@ -304,13 +314,15 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co | |||
| DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); | |||
| // add ctrl edges | |||
| graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor()); | |||
| graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), | |||
| assign_add_node_in_fpbp_loop_->GetInControlAnchor()); | |||
| if (add_ret != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret); | |||
| return FAILED; | |||
| } | |||
| add_ret = GraphUtils::AddEdge(assign_add_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); | |||
| add_ret = GraphUtils::AddEdge(assign_add_node_in_fpbp_loop_->GetOutControlAnchor(), | |||
| active_node->GetInControlAnchor()); | |||
| if (add_ret != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret); | |||
| return FAILED; | |||
| @@ -533,6 +545,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | |||
| // used for stream assign to find active stream | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); | |||
| active_nodes_in_iter_loop_.push_back(active_node); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -142,6 +142,9 @@ class FlowCtrlPass : public GraphPass { | |||
| /// false: only one dataSet exist | |||
| /// | |||
| bool CheckMultiDataSet(ComputeGraphPtr &compute_graph); | |||
| NodePtr assign_add_node_in_fpbp_loop_ = nullptr; | |||
| std::vector<NodePtr> active_nodes_in_iter_loop_; | |||
| }; | |||
| } // namespace ge | |||
| @@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) { | |||
| /// @param [in] ge::OutDataAnchorPtr in_node | |||
| /// @return ge::NodePtr | |||
| /// | |||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, | |||
| const OutDataAnchorPtr &out_data_anchor) { | |||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | |||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | |||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | |||
| @@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node | |||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | |||
| /// @return status | |||
| /// | |||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, | |||
| const OutDataAnchorPtr &src_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | |||
| GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | |||
| @@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra | |||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | |||
| /// @return status | |||
| /// | |||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, | |||
| const OutDataAnchorPtr &src_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | |||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||
| NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | |||
| @@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr | |||
| /// @return status | |||
| /// | |||
| Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | |||
| const OutDataAnchorPtr &var_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| const OutDataAnchorPtr &var_out_anchor, | |||
| const InDataAnchorPtr &hccl_in_anchor) { | |||
| if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | |||
| GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||
| return SUCCESS; | |||
| @@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG | |||
| /// @param [in] ge::OutDataAnchorPtr variable node out anchor | |||
| /// @return ge::NodePtr | |||
| /// | |||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||
| GE_CHECK_NOTNULL_EXEC(graph , return nullptr); | |||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||
| const OutDataAnchorPtr &out_data_anchor) { | |||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | |||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | |||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | |||
| if (pre_op_desc == nullptr) { | |||
| @@ -23,9 +23,9 @@ namespace ge { | |||
| namespace { | |||
| constexpr uint32_t kInplaceSupportOutputIndex = 0; | |||
| constexpr uint32_t kInplaceSupportOutputNum = 1; | |||
| static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||
| ge::CONSTANT, ge::CONSTANTOP, | |||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||
| const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||
| ge::CONSTANT, ge::CONSTANTOP, | |||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||
| } | |||
| Status InplaceSupportCheckPass::Run(NodePtr &node) { | |||
| GELOGD("InplaceSupportCheckPass running"); | |||
| @@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { | |||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); | |||
| return GE_GRAPH_PARAM_NULLPTR; | |||
| } | |||
| GELOGI("NetOutputPass Run."); | |||
| GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str()); | |||
| NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); | |||
| // save user targets node | |||
| SaveAndRemoveTargets(graph); | |||
| @@ -82,14 +82,41 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) { | |||
| } | |||
| } | |||
| if (to_be_deleted) { | |||
| GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str()); | |||
| auto ret = PassUtils::UnlinkNodeWithControlCopy(node, kReshapeShapeIndex); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "DimensionAdjustPass unlink node with control copy fail."); | |||
| return ret; | |||
| } | |||
| auto ret = TryRemoveConstShapeInput(node); | |||
| GE_CHK_STATUS_RET_NOLOG(ret); | |||
| GELOGI("NoUseReshapeRemovePass remove useless reshape node:%s", node->GetName().c_str()); | |||
| return IsolateAndDeleteNode(node, {kReshapeDataIndex}); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_node) { | |||
| auto shape_input_anchor = reshape_node->GetInDataAnchor(kReshapeShapeIndex); | |||
| if (shape_input_anchor == nullptr) { | |||
| return SUCCESS; | |||
| } | |||
| GE_CHECK_NOTNULL(shape_input_anchor->GetPeerOutAnchor()); | |||
| auto shape_input = shape_input_anchor->GetPeerOutAnchor()->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(shape_input); | |||
| if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) { | |||
| return SUCCESS; | |||
| } | |||
| // op(x) const(shape) | |||
| // \ / | |||
| // reshape | |||
| // const input can unlink but should copy control_dependency | |||
| auto ret = PassUtils::UnlinkNodeWithControlCopy(reshape_node, kReshapeShapeIndex); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Unlink node %s with control copy failed.", shape_input->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| // remove const without any data_output | |||
| if (shape_input->GetOutDataNodesSize() == 0) { | |||
| auto ret = IsolateAndDeleteNode(shape_input, {}); | |||
| GE_CHK_GRAPH_STATUS_RET(ret, "Fail to remove node %s", shape_input->GetName().c_str()); | |||
| GELOGI("Remove useless shape input const %s.", shape_input->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -32,6 +32,9 @@ class NoUseReshapeRemovePass : public BaseNodePass { | |||
| /// @author | |||
| /// | |||
| Status Run(ge::NodePtr &node) override; | |||
| private: | |||
| Status TryRemoveConstShapeInput(NodePtr &reshape_node); | |||
| }; | |||
| } // namespace ge | |||
| @@ -27,12 +27,11 @@ | |||
| namespace ge { | |||
| Status PrunePass::Run(ge::ComputeGraphPtr graph) { | |||
| GELOGD("PrunePass Start"); | |||
| GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str()); | |||
| if (graph == nullptr) { | |||
| GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); | |||
| return GE_GRAPH_ISNULL; | |||
| } | |||
| std::vector<NodePtr> out_nodes; | |||
| std::unordered_set<NodePtr> nodes; | |||
| for (NodePtr &node_ptr : graph->GetDirectNode()) { | |||
| @@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) { | |||
| out_nodes.push_back(node_ptr); | |||
| } | |||
| } | |||
| if (out_nodes.empty()) { | |||
| GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); | |||
| return ge::SUCCESS; | |||
| @@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | |||
| switch(key) { | |||
| switch (key) { | |||
| case kReshapeType: { | |||
| bool is_shape_unknown = false; | |||
| if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | |||
| @@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra | |||
| // Break Move and follow, Link Data and follow. | |||
| const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | |||
| const auto in_anchors =out_anchor->GetPeerInDataAnchors(); | |||
| const auto in_anchors = out_anchor->GetPeerInDataAnchors(); | |||
| for (const auto in_anchor : in_anchors) { | |||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | |||
| GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | |||
| @@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, | |||
| Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | |||
| vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | |||
| // check both mode and shape_range option are all enabled | |||
| auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | |||
| bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | |||
| if (!enable_dynamic_execute_mode) { | |||
| @@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { | |||
| return SUCCESS; | |||
| } | |||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) { | |||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||
| const std::map<string, string> &graph_option) { | |||
| // Get shape range of input in dynamic_execute mode | |||
| vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec; | |||
| vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec; | |||
| auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | |||
| GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | |||
| compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | |||
| @@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() { | |||
| return SUCCESS; | |||
| } | |||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) { | |||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input, | |||
| const std::map<string, string> &graph_option) { | |||
| compute_graph_->SetInputSize(user_input.size()); | |||
| if (user_input.empty()) { | |||
| return SUCCESS; | |||
| @@ -23,7 +23,7 @@ | |||
| #include <vector> | |||
| #include "common/debug/log.h" | |||
| #include "common/debug/memory_dumper.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/string_util.h" | |||
| #include "common/types.h" | |||
| @@ -63,8 +63,8 @@ class GraphPrepare { | |||
| Status CheckRefOp(); | |||
| Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | |||
| Status AdjustDataOpOutput(const NodePtr &node); | |||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||
| Status CheckConstOp(); | |||
| Status VerifyConstOp(const NodePtr &node); | |||
| Status CheckUserInput(const std::vector<GeTensor> &user_input); | |||
| @@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con | |||
| /// @return 0: true/false | |||
| /// | |||
| GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||
| const std::string &input_format); | |||
| const std::string &input_format); | |||
| } // namespace multibatch | |||
| } // namespace ge | |||
| @@ -16,6 +16,8 @@ | |||
| #include "host_kernels/slice_kernel.h" | |||
| #include <set> | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "common/types.h" | |||
| @@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3; | |||
| const size_t kSliceInputIndexX = 0; | |||
| const size_t kSliceInputIndexBegin = 1; | |||
| const size_t kSliceInputIndexSize = 2; | |||
| const std::set<ge::DataType> kSupportedDataTypeToLength = { | |||
| DT_BOOL, | |||
| DT_INT64, | |||
| DT_UINT64, | |||
| DT_FLOAT, | |||
| DT_INT32, | |||
| DT_UINT32, | |||
| DT_INT8, | |||
| DT_UINT8, | |||
| DT_INT16, | |||
| DT_UINT16, | |||
| DT_FLOAT16, | |||
| DT_DOUBLE, | |||
| DT_DUAL, | |||
| DT_DUAL_SUB_INT8, | |||
| DT_DUAL_SUB_UINT8, | |||
| DT_COMPLEX64, | |||
| DT_COMPLEX128, | |||
| DT_QINT8, | |||
| DT_QINT16, | |||
| DT_QINT32, | |||
| DT_QUINT8, | |||
| DT_QUINT16, | |||
| }; | |||
| } // namespace | |||
| Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input, | |||
| @@ -56,6 +82,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso | |||
| // data type in input_x | |||
| auto data_type = x_->GetTensorDesc().GetDataType(); | |||
| // check supported | |||
| if (kSupportedDataTypeToLength.count(data_type) == 0) { | |||
| GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return NOT_CHANGED; | |||
| } | |||
| uint32_t type_size = 0; | |||
| bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size); | |||
| if (!is_success) { | |||
| return NOT_CHANGED; | |||
| } | |||
| // check data type of begin and size | |||
| if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { | |||
| GELOGW("Data type of begin and size for slice are not DT_INT32."); | |||
| @@ -69,7 +105,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso | |||
| GE_CHECK_NOTNULL(begin_data); | |||
| GE_CHECK_NOTNULL(size_data); | |||
| size_t data_size = x_->GetData().size() / sizeof(int32_t); | |||
| size_t data_size = x_->GetData().size() / type_size; | |||
| size_t begin_size = begin->GetData().size() / sizeof(int32_t); | |||
| size_t size_size = size->GetData().size() / sizeof(int32_t); | |||
| const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape(); | |||
| @@ -62,9 +62,9 @@ struct GraphExecutionContext { | |||
| rtStream_t stream = nullptr; | |||
| rtContext_t rt_context = nullptr; | |||
| rtContext_t rt_gen_context = nullptr; | |||
| std::unique_ptr<CallbackManager> callback_manager; | |||
| std::unique_ptr<CallbackManager> callback_manager = nullptr; | |||
| NpuMemoryAllocator *allocator = nullptr; | |||
| mutable std::unique_ptr<HybridProfiler> profiler; | |||
| mutable std::unique_ptr<HybridProfiler> profiler = nullptr; | |||
| DumpProperties dump_properties; | |||
| bool trace_enabled = false; | |||
| bool dump_enabled = false; | |||
| @@ -26,6 +26,7 @@ namespace hybrid { | |||
| namespace { | |||
| const int kDataOutputIndex = 0; | |||
| const size_t kMinimumPiplineStages = 2; | |||
| const int kDefaultLoopCount = 10; | |||
| } | |||
| HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | |||
| : model_(model), run_flag_(false) { | |||
| @@ -150,7 +151,7 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||
| GELOGI("HybridModel will execute in pipeline mode"); | |||
| auto iter_per_run = std::getenv("ITER_NUM"); | |||
| if (iter_per_run) { | |||
| args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, 10)); | |||
| args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, kDefaultLoopCount)); | |||
| } | |||
| ret = pipe_executor_->Execute(args); | |||
| } else { | |||
| @@ -250,7 +251,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||
| if (k >= shape.GetDimNum()) { | |||
| break; | |||
| } | |||
| if (shape.GetDim(k) < range[k].first || shape.GetDim(k) > range[k].second) { | |||
| // range[k].second can be -1 | |||
| if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) { | |||
| GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]", | |||
| input_index, k, shape.GetDim(k), range[k].first, range[k].second); | |||
| return PARAM_INVALID; | |||
| @@ -8,6 +8,7 @@ namespace ge { | |||
| namespace hybrid { | |||
| namespace { | |||
| constexpr int kNumExecutors = 2; | |||
| const int kMinLoopCount = 2; | |||
| const int kIntBase = 10; | |||
| const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | |||
| } | |||
| @@ -208,7 +209,7 @@ Status HybridModelPipelineExecutor::InitStageExecutors() { | |||
| Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
| int loop_count = args.num_loops; | |||
| GE_CHECK_GE(loop_count, 2); | |||
| GE_CHECK_GE(loop_count, kMinLoopCount); | |||
| auto &inputs = args.inputs; | |||
| auto &input_desc = args.input_desc; | |||
| @@ -30,7 +30,7 @@ class NodeTask; | |||
| struct GraphExecutionContext; | |||
| class SubgraphContext; | |||
| class TaskContext; | |||
| class NodeState; | |||
| struct NodeState; | |||
| class ShapeFuture { | |||
| public: | |||
| @@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | |||
| GetContext().SetSessionId(context_->context_id); | |||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | |||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||
| GetContext().SetSessionId(context_->session_id); | |||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | |||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -70,8 +70,6 @@ class NodeDoneCallback { | |||
| Status PrepareConstInputs(const NodeItem &node_item); | |||
| Status DumpDynamicNode(); | |||
| Status ProfilingReport(); | |||
| Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||
| Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<TaskDescInfo> &task_desc_info); | |||
| GraphExecutionContext *graph_context_; | |||
| @@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||
| } | |||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| task_desc_info = context_->GetProfilingTaskDescInfo(); | |||
| context_->ClearProfilingTaskDescInfo(); | |||
| return SUCCESS; | |||
| } | |||
| Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GE_CHECK_NOTNULL(model); | |||
| GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||
| compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||
| context_->ClearProfilingGraphDescInfo(); | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| for (auto &tmp_compute_graph_info : compute_graph_info) { | |||
| // default | |||
| if (op_desc->GetAllInputsSize() == 0) { | |||
| tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.input_shape = { {0} }; | |||
| tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
| if (input_desc == nullptr) { | |||
| continue; | |||
| } | |||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
| } | |||
| if (op_desc->GetOutputsSize() == 0) { | |||
| tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.output_shape = { {0} }; | |||
| tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
| } | |||
| for (auto &tmp_task_desc : task_desc_info) { | |||
| // save op input and output info | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||
| } | |||
| return SUCCESS; | |||
| @@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||
| return profiling_ret; | |||
| } | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||
| if (profiling_ret != RT_ERROR_NONE) { | |||
| GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||
| return profiling_ret; | |||
| } | |||
| auto &profiling_manager = ProfilingManager::Instance(); | |||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| @@ -323,6 +277,8 @@ Status NodeDoneCallback::OnNodeDone() { | |||
| node_item.NodeName().c_str()); | |||
| } | |||
| // release workspace | |||
| context_->ReleaseWorkspace(); | |||
| // release inputs | |||
| for (int i = 0; i < context_->NumInputs(); ++i) { | |||
| context_->ReleaseInput(i); | |||
| @@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||
| op_index = task_def.kernel_ex().op_index(); | |||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | |||
| op_index = task_def.kernel_hccl().op_index(); | |||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||
| op_index = task_def.kernel_with_handle().context().op_index(); | |||
| } else { | |||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
| continue; | |||
| @@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||
| } | |||
| auto &node = iter->second; | |||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||
| } | |||
| @@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| } | |||
| @@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; | |||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | |||
| } // namespace | |||
| TbeHandleHolder::TbeHandleHolder(void *bin_handle) | |||
| : bin_handle_(bin_handle) {} | |||
| TbeHandleHolder::~TbeHandleHolder() { | |||
| if (bin_handle_ != nullptr) { | |||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||
| } | |||
| } | |||
| bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||
| return ret.second; | |||
| } | |||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | |||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | |||
| @@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||
| if (rt_ret != RT_ERROR_NONE || is_single_op_) { | |||
| void *bin_handle = nullptr; | |||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | |||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||
| GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||
| rtDevBinary_t binary; | |||
| std::string json_string; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | |||
| @@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | |||
| kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | |||
| } else { | |||
| GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||
| GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | |||
| } | |||
| std::string kernel_name; | |||
| @@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||
| "[%s] Failed to validate task def: [%s]", | |||
| op_desc.GetName().c_str(), | |||
| task_def.DebugString().c_str()); | |||
| Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { | |||
| TbeHandleRegistry ®istry = TbeHandleRegistry::GetInstance(); | |||
| auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||
| if (tbe_kernel == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| void *bin_handle = nullptr; | |||
| GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); | |||
| rtDevBinary_t binary; | |||
| std::string json_string; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), | |||
| GELOGI("Get original type of session_graph_id.")); | |||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||
| } else { | |||
| GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| binary.version = 0; | |||
| binary.data = tbe_kernel->GetBinData(); | |||
| binary.length = tbe_kernel->GetBinDataSize(); | |||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||
| GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); | |||
| handle_ = bin_handle; | |||
| auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_)); | |||
| if (holder == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| if (!registry.AddHandle(std::move(holder))) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| stub_name_ = kernel_def.stub_func(); | |||
| GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | |||
| GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | |||
| args_size_ = kernel_def.args_size(); | |||
| block_dim_ = kernel_def.block_dim(); | |||
| // malloc args memory | |||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | |||
| GE_CHECK_NOTNULL(args_); | |||
| if (kernel_def.args().size() < args_size_) { | |||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (err != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | |||
| @@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef | |||
| block_dim_, | |||
| arg_base_, | |||
| args_size_); | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); | |||
| const domi::KernelContext &context = kernel_with_handle.context(); | |||
| GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); | |||
| original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_"; | |||
| node_info_ = kernel_with_handle.node_info() + "/"; | |||
| args_size_ = kernel_with_handle.args_size(); | |||
| block_dim_ = kernel_with_handle.block_dim(); | |||
| // malloc args memory | |||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | |||
| GE_CHECK_NOTNULL(args_); | |||
| if (kernel_with_handle.args().size() < args_size_) { | |||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); | |||
| if (err != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| if (context.args_offset().size() < sizeof(uint16_t)) { | |||
| GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||
| uint32_t offset = *args_offset_buffer; | |||
| if (offset > args_size_) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| "[%s] Arg offset out of range. offset = %u, arg size = %u", | |||
| GetName().c_str(), | |||
| offset, | |||
| args_size_); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); | |||
| max_arg_count_ = (args_size_ - offset) / sizeof(void *); | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||
| "[%s] Failed to validate task def: [%s]", | |||
| op_desc.GetName().c_str(), | |||
| task_def.DebugString().c_str()); | |||
| if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { | |||
| GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); | |||
| } else { | |||
| GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (task_type != RT_MODEL_TASK_KERNEL) { | |||
| if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||
| GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||
| task_def.kernel_with_handle().context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type != ccKernelType::TE) { | |||
| GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); | |||
| @@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | |||
| } | |||
| Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { | |||
| if (tiling_buffer_ != nullptr) { | |||
| if (is_dynamic_) { | |||
| return UpdateTilingInfo(context); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||
| clear_atomic_ = tiling_info.clear_atomic; | |||
| tiling_data_ = tiling_info.tiling_data.str(); | |||
| tiling_key_ = tiling_info.tiling_key; | |||
| GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); | |||
| if (tiling_data_.empty()) { | |||
| GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); | |||
| GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| if (tiling_buffer_ == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!"); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -238,6 +351,9 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) | |||
| GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | |||
| "Failed calc tiling data of node %s.", | |||
| node->GetName().c_str()); | |||
| if (is_single_op_) { | |||
| tiling_info.clear_atomic = false; | |||
| } | |||
| GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -296,16 +412,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | |||
| } | |||
| Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | |||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||
| if (handle_ != nullptr) { | |||
| std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_); | |||
| std::string kernel_info = node_info_ + std::to_string(tiling_key_); | |||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||
| block_dim_); | |||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr, | |||
| stream, kernel_info.c_str())); | |||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||
| block_dim_); | |||
| } else { | |||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | |||
| bool dynamic_supported = false; | |||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); | |||
| if (!dynamic_supported) { | |||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_); | |||
| if (!is_dynamic_) { | |||
| GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -314,22 +440,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | |||
| int64_t max_size = -1; | |||
| (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); | |||
| GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | |||
| if (max_size <= 0) { | |||
| if (max_size < 0) { | |||
| GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); | |||
| return PARAM_INVALID; | |||
| } | |||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||
| GE_CHECK_NOTNULL(allocator); | |||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||
| if (max_size > 0) { | |||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||
| } else { | |||
| GELOGD("op_param_size is 0, no need to create tiling buffer."); | |||
| } | |||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||
| return SUCCESS; | |||
| } | |||
| bool AiCoreOpTask::IsDynamicShapeSupported() { | |||
| return tiling_buffer_ != nullptr; | |||
| return is_dynamic_; | |||
| } | |||
| const std::string &AiCoreOpTask::GetName() const { | |||
| @@ -28,6 +28,32 @@ | |||
| namespace ge { | |||
| namespace hybrid { | |||
| class TbeHandleHolder { | |||
| public: | |||
| TbeHandleHolder(void *bin_handle); | |||
| ~TbeHandleHolder(); | |||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||
| void *GetBinHandle() { return bin_handle_; } | |||
| private: | |||
| friend class TbeHandleRegistry; | |||
| void *bin_handle_ = nullptr; | |||
| }; | |||
| class TbeHandleRegistry { | |||
| public: | |||
| static TbeHandleRegistry &GetInstance() { | |||
| static TbeHandleRegistry instance; | |||
| return instance; | |||
| } | |||
| bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder); | |||
| private: | |||
| std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_; | |||
| }; | |||
| class AiCoreOpTask { | |||
| public: | |||
| AiCoreOpTask() = default; | |||
| @@ -67,6 +93,9 @@ class AiCoreOpTask { | |||
| Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | |||
| Status InitTilingInfo(const OpDesc &op_desc); | |||
| Status RegisterTbeHandle(const OpDesc &op_desc); | |||
| Status RegisterKernelHandle(const OpDesc &op_desc); | |||
| Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); | |||
| Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); | |||
| std::string stub_name_; | |||
| void *stub_func_ = nullptr; | |||
| @@ -76,6 +105,11 @@ class AiCoreOpTask { | |||
| bool clear_atomic_ = true; | |||
| bool is_single_op_ = false; | |||
| std::vector<int> output_indices_to_skip_; | |||
| string original_kernel_key_; | |||
| string node_info_; | |||
| uint32_t tiling_key_ = 0; | |||
| void *handle_ = nullptr; | |||
| bool is_dynamic_ = false; | |||
| }; | |||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | |||
| @@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| auto callback = [=, &context]() { | |||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
| @@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context, | |||
| TaskContext::~TaskContext() { | |||
| GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); | |||
| for (auto ws_addr : workspaces_) { | |||
| execution_context_->allocator->Deallocate(ws_addr); | |||
| } | |||
| // release output | |||
| for (int i = 0; i < NumOutputs(); ++i) { | |||
| auto output_tensor = MutableOutput(i); | |||
| @@ -49,6 +45,13 @@ TaskContext::~TaskContext() { | |||
| } | |||
| } | |||
| void TaskContext::ReleaseWorkspace() { | |||
| GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str()); | |||
| for (auto ws_addr : workspaces_) { | |||
| execution_context_->allocator->Deallocate(ws_addr); | |||
| } | |||
| } | |||
| std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state, | |||
| GraphExecutionContext *execution_context, | |||
| SubgraphContext *subgraph_context) { | |||
| @@ -512,21 +515,21 @@ Status TaskContext::Synchronize() { | |||
| } | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| uint32_t task_type, uint32_t block_dim) { | |||
| const std::string &task_type, uint32_t block_dim) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||
| GE_CHECK_NOTNULL(graph_context); | |||
| const HybridModel *model = graph_context->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string op_name = op_desc->GetName(); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| TaskDescInfo tmp_task_desc_info; | |||
| tmp_task_desc_info.model_name = dynamic_model_name; | |||
| tmp_task_desc_info.op_name = op_name; | |||
| tmp_task_desc_info.op_name = op_desc->GetName(); | |||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||
| tmp_task_desc_info.block_dim = block_dim; | |||
| tmp_task_desc_info.task_type = task_type; | |||
| tmp_task_desc_info.task_id = task_id; | |||
| @@ -543,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||
| return node_state_; | |||
| } | |||
| Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
| GE_CHECK_NOTNULL(graph_context); | |||
| const HybridModel *model = graph_context->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
| tmp_compute_graph_info.task_id = task_id; | |||
| tmp_compute_graph_info.stream_id = stream_id; | |||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -56,6 +56,7 @@ class TaskContext { | |||
| void ReleaseInputsAndOutputs(); | |||
| bool NeedCallback(); | |||
| void ReleaseInput(int index); | |||
| void ReleaseWorkspace(); | |||
| const TensorValue *GetInput(int index) const; | |||
| const TensorValue *GetOutput(int index) const; | |||
| TensorValue *MutableOutput(int index); | |||
| @@ -112,13 +113,10 @@ class TaskContext { | |||
| void *handle_ = nullptr; | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| const std::string &task_type, uint32_t block_dim); | |||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
| const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||
| Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||
| void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||
| private: | |||
| TaskContext(GraphExecutionContext *execution_context, | |||
| NodeState *node_state, | |||
| @@ -140,7 +138,6 @@ class TaskContext { | |||
| uint32_t task_id_ = 0; | |||
| uint32_t stream_id_ = 0; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| }; | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -62,19 +62,18 @@ using std::shared_ptr; | |||
| using std::string; | |||
| using std::vector; | |||
| namespace { | |||
| static bool is_dynamic_input = false; | |||
| const char *const kModeSupport = "only support 0(model to framework model), " | |||
| "1(framework model to json), 3(only pre-check), " | |||
| "5(pbtxt to json), 6(display model info)"; | |||
| const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | |||
| static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||
| static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||
| static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||
| const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||
| const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||
| const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||
| // limit available mem size 2G | |||
| const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | |||
| } // namespace | |||
| DEFINE_string(model, "", "The model file."); | |||
| DEFINE_string(output, "", "The output file path&name."); | |||
| @@ -1326,6 +1325,7 @@ int init(int argc, char* argv[]) { | |||
| return ret; | |||
| } | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| return 0; | |||
| } | |||
| @@ -23,7 +23,7 @@ | |||
| #include "common/debug/memory_dumper.h" | |||
| #include "common/ge/ge_util.h" | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "common/model_saver.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/string_util.h" | |||
| @@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js | |||
| } else { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10003", | |||
| {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "ParseModelContent failed because of invalid om file. Please check --om param."); | |||
| } | |||
| if (model.model_data != nullptr) { | |||
| @@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||
| return SUCCESS; | |||
| } | |||
| string model_name; | |||
| string op_name; | |||
| TaskDescInfo tmp_task_desc_info; | |||
| uint32_t model_id; | |||
| uint32_t block_dim; | |||
| if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||
| if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||
| tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||
| TaskDescInfo tmp_task_desc_info; | |||
| tmp_task_desc_info.model_name = model_name; | |||
| tmp_task_desc_info.op_name = op_name; | |||
| tmp_task_desc_info.block_dim = block_dim; | |||
| tmp_task_desc_info.task_id = task_id; | |||
| tmp_task_desc_info.stream_id = stream_id; | |||
| tmp_task_desc_info.shape_type = shape_type; | |||
| tmp_task_desc_info.cur_iter_num = 0; | |||
| tmp_task_desc_info.task_type = op_task->GetTaskType(); | |||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||
| auto &profiling_manager = ProfilingManager::Instance(); | |||
| profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||
| profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace | |||
| @@ -30,8 +30,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManag | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name, | |||
| const ModelData &model_data, | |||
| void *stream, | |||
| SingleOp **single_op) { | |||
| GELOGI("GetOpFromModel in. model name = %s", model_name.c_str()); | |||
| SingleOp **single_op, | |||
| const uint64_t model_id) { | |||
| GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); | |||
| if (single_op == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null"); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| @@ -99,7 +100,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { | |||
| Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, | |||
| const ModelData &model_data, | |||
| void *stream, | |||
| DynamicSingleOp **single_op) { | |||
| DynamicSingleOp **single_op, | |||
| const uint64_t model_id) { | |||
| GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); | |||
| if (!tiling_func_registered_) { | |||
| RegisterTilingFunc(); | |||
| } | |||
| @@ -37,12 +37,14 @@ class SingleOpManager { | |||
| Status GetOpFromModel(const std::string &model_name, | |||
| const ge::ModelData &model_data, | |||
| void *stream, | |||
| SingleOp **single_op); | |||
| SingleOp **single_op, | |||
| const uint64_t model_id); | |||
| Status GetDynamicOpFromModel(const std::string &model_name, | |||
| const ge::ModelData &model_data, | |||
| void *stream, | |||
| DynamicSingleOp **dynamic_single_op); | |||
| DynamicSingleOp **dynamic_single_op, | |||
| const uint64_t model_id); | |||
| StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); | |||
| @@ -190,7 +190,7 @@ Status SingleOpModel::LoadAllNodes() { | |||
| auto node = nodes.at(i); | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| op_list_[i] = node; | |||
| op_list_[op_desc->GetId()] = node; | |||
| auto op_type = op_desc->GetType(); | |||
| GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str()); | |||
| @@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s | |||
| if (kernel_type == ccKernelType::TE) { | |||
| GELOGD("Building TBE task"); | |||
| TbeOpTask *tbe_task = nullptr; | |||
| auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); | |||
| auto ret = BuildKernelTask(task_def, &tbe_task); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| @@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { | |||
| } | |||
| } | |||
| Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { | |||
| Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { | |||
| GE_CHECK_NOTNULL(task); | |||
| const auto &context = kernel_def.context(); | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||
| task_def.kernel_with_handle().context(); | |||
| auto iter = op_list_.find(context.op_index()); | |||
| if (iter == op_list_.end()) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); | |||
| @@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def); | |||
| auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); | |||
| auto ret = builder.BuildTask(*tbe_task, model_params_); | |||
| if (ret != SUCCESS) { | |||
| delete tbe_task; | |||
| @@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||
| } | |||
| Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const auto &context = kernel_def.context(); | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||
| task_def.kernel_with_handle().context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| GELOGD("Building TBE task"); | |||
| TbeOpTask *tbe_task = nullptr; | |||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | |||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | |||
| tbe_task->SetModelArgs(model_name_, model_id_); | |||
| single_op.op_task_.reset(tbe_task); | |||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| @@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
| GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | |||
| task_def.DebugString().c_str()); | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||
| if (single_op.op_task_ != nullptr) { | |||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | |||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||
| @@ -24,7 +24,6 @@ | |||
| #include <vector> | |||
| #include "common/helper/model_helper.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/stream_resource.h" | |||
| @@ -67,7 +66,7 @@ class SingleOpModel { | |||
| Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | |||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||
| Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | |||
| Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | |||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | |||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | |||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | |||
| @@ -23,6 +23,7 @@ | |||
| #include "aicpu/common/aicpu_task_struct.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/dump/dump_op.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/formats/formats.h" | |||
| #include "common/math/math_util.h" | |||
| #include "framework/common/debug/log.h" | |||
| @@ -93,6 +94,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size | |||
| op_desc_ = op_desc; | |||
| } | |||
| void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
| const OpDescPtr &op_desc, | |||
| const domi::KernelDefWithHandle &kernel_def_with_handle) { | |||
| SetKernelArgs(std::move(args), arg_size, block_dim, op_desc); | |||
| original_kernel_key_ = kernel_def_with_handle.original_kernel_key(); | |||
| node_info_ = kernel_def_with_handle.node_info(); | |||
| } | |||
| void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } | |||
| void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||
| @@ -100,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||
| model_id_ = model_id; | |||
| } | |||
| Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||
| uint32_t &block_dim) { | |||
| model_name = model_name_; | |||
| model_id = model_id_; | |||
| block_dim = block_dim_; | |||
| Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GE_CHECK_NOTNULL(op_desc_); | |||
| op_name = op_desc_->GetName(); | |||
| string op_name = op_desc_->GetName(); | |||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
| model_id = model_id_; | |||
| task_desc_info.model_name = model_name_; | |||
| task_desc_info.block_dim = block_dim_; | |||
| task_desc_info.task_id = task_id; | |||
| task_desc_info.stream_id = stream_id; | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.op_type = op_desc_->GetType(); | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||
| return UNSUPPORTED; | |||
| } | |||
| @@ -145,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| return UNSUPPORTED; | |||
| } | |||
| uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
| const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
| TbeOpTask::~TbeOpTask() { | |||
| if (sm_desc_ != nullptr) { | |||
| @@ -163,7 +186,11 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||
| const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | |||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
| const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
| void TbeOpTask::SetHandle(void *handle) { | |||
| this->handle_ = handle; | |||
| } | |||
| Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||
| GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | |||
| @@ -204,8 +231,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||
| } | |||
| block_dim_ = run_info.block_dim; | |||
| tiling_data_ = run_info.tiling_data.str(); | |||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, | |||
| tiling_data_.size()); | |||
| tiling_key_ = run_info.tiling_key; | |||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | |||
| tiling_data_.size(), tiling_key_); | |||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | |||
| return SUCCESS; | |||
| @@ -329,8 +357,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| } | |||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||
| if (handle_ == nullptr) { | |||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||
| } else { | |||
| std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | |||
| std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | |||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||
| stream, kernel_info.c_str())); | |||
| GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -363,7 +400,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||
| num_inputs_, | |||
| num_outputs_, | |||
| unknown_type_)); | |||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); | |||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Malloc aicpu_ext_handle mem failed!"); | |||
| Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | |||
| if (ret != SUCCESS) { | |||
| @@ -401,7 +439,7 @@ Status AiCpuBaseTask::SetInputConst() { | |||
| return SUCCESS; | |||
| } | |||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| rtStream_t stream) { | |||
| GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | |||
| @@ -811,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||
| return DoUpdateArgTable(param, false); | |||
| } | |||
| uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
| const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
| void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||
| arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | |||
| @@ -43,7 +43,7 @@ class OpTask { | |||
| const vector<GeTensorDesc> &output_desc); | |||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||
| Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | |||
| Status OpenDump(rtStream_t stream); | |||
| virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | |||
| @@ -52,7 +52,7 @@ class OpTask { | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| std::vector<DataBuffer> &output_buffers, | |||
| rtStream_t stream); | |||
| virtual uint32_t GetTaskType() const; | |||
| virtual const std::string &GetTaskType() const; | |||
| protected: | |||
| Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | |||
| @@ -78,6 +78,8 @@ class TbeOpTask : public OpTask { | |||
| void SetSmDesc(void *sm_desc); | |||
| void SetStubFunc(const std::string &name, const void *stub_func); | |||
| void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | |||
| void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | |||
| Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||
| const vector<GeTensorDesc> &output_desc) override; | |||
| @@ -86,7 +88,8 @@ class TbeOpTask : public OpTask { | |||
| size_t GetArgSize() const; | |||
| const std::string &GetStubName() const; | |||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||
| uint32_t GetTaskType() const override; | |||
| const std::string &GetTaskType() const override; | |||
| void SetHandle(void *handle); | |||
| private: | |||
| friend class SingleOpModel; | |||
| @@ -107,6 +110,11 @@ class TbeOpTask : public OpTask { | |||
| std::string tiling_data_; | |||
| std::vector<void *> workspaces_; | |||
| NodePtr node_; | |||
| uint32_t tiling_key_ = 0; | |||
| void* handle_ = nullptr; | |||
| std::string original_kernel_key_; | |||
| std::string node_info_; | |||
| }; | |||
| class AiCpuBaseTask : public OpTask { | |||
| @@ -115,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||
| ~AiCpuBaseTask() override; | |||
| UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | |||
| Status UpdateArgTable(const SingleOpModelParam ¶m) override; | |||
| uint32_t GetTaskType() const override; | |||
| const std::string &GetTaskType() const override; | |||
| protected: | |||
| Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
| @@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() { | |||
| } | |||
| } | |||
| HandleHolder::HandleHolder(void *bin_handle) | |||
| : bin_handle_(bin_handle) {} | |||
| HandleHolder::~HandleHolder() { | |||
| if (bin_handle_ != nullptr) { | |||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||
| } | |||
| } | |||
| const char *KernelBinRegistry::GetUnique(const string &stub_func) { | |||
| std::lock_guard<std::mutex> lock(mutex_); | |||
| auto it = unique_stubs_.find(stub_func); | |||
| @@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr< | |||
| return ret.second; | |||
| } | |||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) | |||
| bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) { | |||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||
| return ret.second; | |||
| } | |||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) | |||
| : node_(node), | |||
| op_desc_(node->GetOpDesc()), | |||
| kernel_def_(kernel_def), | |||
| task_def_(task_def), | |||
| kernel_def_(task_def.kernel()), | |||
| kernel_def_with_handle_(task_def.kernel_with_handle()), | |||
| stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | |||
| Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | |||
| @@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||
| binary.data = kernel_bin.GetBinData(); | |||
| binary.length = kernel_bin.GetBinDataSize(); | |||
| binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||
| auto ret = rtDevBinaryRegister(&binary, bin_handle); | |||
| Status ret = 0; | |||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||
| ret = rtRegisterAllKernel(&binary, bin_handle); | |||
| } else { | |||
| ret = rtDevBinaryRegister(&binary, bin_handle); | |||
| } | |||
| if (ret != RT_ERROR_NONE) { | |||
| GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||
| GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||
| param.core_type, static_cast<int>(ret)); | |||
| return ret; | |||
| } | |||
| @@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam | |||
| Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, | |||
| const SingleOpModelParam ¶m) { | |||
| std::string kernel_name; | |||
| GetKernelName(op_desc_, kernel_name); | |||
| void *handle = nullptr; | |||
| auto ret = DoRegisterBinary(tbe_kernel, &handle, param); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||
| *bin_handle = handle; | |||
| return SUCCESS; | |||
| } | |||
| ret = DoRegisterMeta(handle); | |||
| if (ret != SUCCESS) { | |||
| @@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const | |||
| return ret; | |||
| } | |||
| std::string kernel_name; | |||
| GetKernelName(op_desc_, kernel_name); | |||
| ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); | |||
| if (ret != SUCCESS) { | |||
| GE_CHK_RT(rtDevBinaryUnRegister(handle)); | |||
| @@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||
| void *bin_handle = nullptr; | |||
| auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); | |||
| if (ret == SUCCESS) { | |||
| holder->SetBinHandle(bin_handle); | |||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||
| // should not happen. only one thread can reach here | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| holder->SetBinHandle(bin_handle); | |||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||
| // should not happen. only one thread can reach here | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| } | |||
| @@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||
| return SUCCESS; | |||
| } | |||
| Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m) { | |||
| GELOGD("RegisterKernelWithHandle begin."); | |||
| HandleRegistry ®istry = HandleRegistry::GetInstance(); | |||
| auto tbe_kernel = GetTbeKernel(op_desc_); | |||
| if (tbe_kernel == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", | |||
| op_desc_->GetName().c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| void *bin_handle = nullptr; | |||
| auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| handle_ = bin_handle; | |||
| auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_)); | |||
| if (holder == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| if (!registry.AddHandle(std::move(holder))) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { | |||
| const std::string &sm_desc_str = kernel_def_.sm_desc(); | |||
| if (sm_desc_str.empty()) { | |||
| @@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||
| } | |||
| } | |||
| auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||
| if (rtRet != RT_ERROR_NONE) { | |||
| GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet)); | |||
| return rtRet; | |||
| auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret)); | |||
| return rt_ret; | |||
| } | |||
| rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rtRet != RT_ERROR_NONE) { | |||
| rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| (void)rtMemFreeManaged(*sm_desc); | |||
| GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet)); | |||
| return rtRet; | |||
| GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret)); | |||
| return rt_ret; | |||
| } | |||
| } | |||
| @@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
| GE_CHECK_NOTNULL(args); | |||
| auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rtRet != RT_ERROR_NONE) { | |||
| GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet)); | |||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| const domi::KernelContext &context = kernel_def_.context(); | |||
| @@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||
| rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rtRet != RT_ERROR_NONE) { | |||
| GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet)); | |||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||
| return SUCCESS; | |||
| } | |||
| Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||
| const OpDescPtr &op_desc) { | |||
| size_t arg_size = kernel_def_with_handle_.args_size(); | |||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
| GE_CHECK_NOTNULL(args); | |||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||
| return rt_ret; | |||
| } | |||
| const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||
| const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||
| uint16_t offset = *args_offset_tmp; | |||
| bool is_dynamic = false; | |||
| (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||
| if (is_dynamic) { | |||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||
| } else { | |||
| // copy args | |||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||
| return rt_ret; | |||
| } | |||
| } | |||
| task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||
| kernel_def_with_handle_); | |||
| return SUCCESS; | |||
| } | |||
| Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | |||
| GELOGD("Build tbe task begin"); | |||
| auto ret = SetKernelArgs(task, param, op_desc_); | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||
| auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||
| SetKernelArgs(task, param, op_desc_); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| ret = RegisterKernel(task, param); | |||
| ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | |||
| RegisterKernel(task, param); | |||
| task.SetHandle(handle_); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); | |||
| GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); | |||
| void *stub_func = nullptr; | |||
| auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||
| if (rtRet != SUCCESS) { | |||
| GELOGE(rtRet, "rtGetFunctionByName failed."); | |||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||
| if (task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||
| void *stub_func = nullptr; | |||
| auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||
| if (rt_ret != SUCCESS) { | |||
| GELOGE(rt_ret, "rtGetFunctionByName failed."); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| task.SetStubFunc(stub_name_, stub_func); | |||
| } | |||
| task.SetStubFunc(stub_name_, stub_func); | |||
| return SUCCESS; | |||
| } | |||
| @@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||
| int64_t max_size = -1; | |||
| (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | |||
| GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | |||
| if (max_size <= 0) { | |||
| if (max_size < 0) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| void *tiling_buffer = nullptr; | |||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||
| GE_CHECK_NOTNULL(tiling_buffer); | |||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||
| if (max_size > 0) { | |||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||
| GE_CHECK_NOTNULL(tiling_buffer); | |||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||
| } | |||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | |||
| return SUCCESS; | |||
| @@ -42,6 +42,19 @@ class KernelHolder { | |||
| std::shared_ptr<ge::OpKernelBin> kernel_bin_; | |||
| }; | |||
| class HandleHolder { | |||
| public: | |||
| HandleHolder(void *bin_handle); | |||
| ~HandleHolder(); | |||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||
| void *GetBinHandle() { return bin_handle_; } | |||
| private: | |||
| friend class HandleRegistry; | |||
| void *bin_handle_ = nullptr; | |||
| }; | |||
| class KernelBinRegistry { | |||
| public: | |||
| static KernelBinRegistry &GetInstance() { | |||
| @@ -61,9 +74,22 @@ class KernelBinRegistry { | |||
| std::mutex mutex_; | |||
| }; | |||
| class HandleRegistry { | |||
| public: | |||
| static HandleRegistry &GetInstance() { | |||
| static HandleRegistry instance; | |||
| return instance; | |||
| } | |||
| bool AddHandle(std::unique_ptr<HandleHolder> &&holder); | |||
| private: | |||
| std::set<std::unique_ptr<HandleHolder>> registered_handles_; | |||
| }; | |||
| class TbeTaskBuilder { | |||
| public: | |||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); | |||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); | |||
| ~TbeTaskBuilder() = default; | |||
| Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | |||
| @@ -71,9 +97,11 @@ class TbeTaskBuilder { | |||
| private: | |||
| Status InitTilingInfo(TbeOpTask &task); | |||
| Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||
| Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||
| Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | |||
| Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | |||
| Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m); | |||
| Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, | |||
| const SingleOpModelParam ¶m); | |||
| Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | |||
| @@ -83,8 +111,11 @@ class TbeTaskBuilder { | |||
| const NodePtr node_; | |||
| const OpDescPtr op_desc_; | |||
| const domi::TaskDef &task_def_; | |||
| const domi::KernelDef &kernel_def_; | |||
| const domi::KernelDefWithHandle &kernel_def_with_handle_; | |||
| const std::string stub_name_; | |||
| void *handle_ = nullptr; | |||
| }; | |||
| } // namespace ge | |||
| @@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString | |||
| // Finalize GE, release all resources | |||
| GE_FUNC_VISIBILITY Status GEFinalize(); | |||
| GE_FUNC_VISIBILITY std::string GEGetErrorMsg(); | |||
| GE_FUNC_VISIBILITY std::string GEGetWarningMsg(); | |||
| class GE_FUNC_VISIBILITY Session { | |||
| public: | |||
| ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | |||
| @@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | |||
| // profiling data | |||
| const uint32_t kTaskTypeAicore = 0; | |||
| const uint32_t kTaskTypeAicpu = 1; | |||
| const uint32_t kTaskTypeInvalid = 0xFFFF; | |||
| const std::string kTaskTypeAicore = "AI_CORE"; | |||
| const std::string kTaskTypeAicpu = "AI_CPU"; | |||
| const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||
| // Data cache, including data address and length | |||
| struct DataBuffer { | |||
| @@ -251,27 +251,19 @@ struct Options { | |||
| struct TaskDescInfo { | |||
| std::string model_name; | |||
| std::string op_name; | |||
| std::string op_type; | |||
| uint32_t block_dim; | |||
| uint32_t task_id; | |||
| uint32_t stream_id; | |||
| std::string shape_type; | |||
| int64_t cur_iter_num; | |||
| uint32_t task_type; | |||
| }; | |||
| // Profiling info of graph | |||
| struct ComputeGraphDescInfo { | |||
| std::string model_name; | |||
| std::string op_name; | |||
| std::string op_type; | |||
| std::string task_type; | |||
| std::vector<Format> input_format; | |||
| std::vector<std::vector<int64_t>> input_shape; | |||
| std::vector<DataType> input_data_type; | |||
| std::vector<Format> output_format; | |||
| std::vector<std::vector<int64_t>> output_shape; | |||
| std::vector<DataType> output_data_type; | |||
| uint32_t task_id; | |||
| uint32_t stream_id; | |||
| }; | |||
| struct OpDescInfo { | |||
| @@ -260,12 +260,18 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
| static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, | |||
| SingleOp **single_op); | |||
| static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream, | |||
| SingleOp **single_op, const uint64_t model_id); | |||
| static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
| std::vector<DataBuffer> &outputs); | |||
| static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| DynamicSingleOp **single_op); | |||
| static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
| DynamicSingleOp **single_op, const uint64_t model_id); | |||
| static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||
| const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | |||
| std::vector<DataBuffer> &outputs); | |||
| @@ -55,7 +55,8 @@ typedef void *OpTensor_t; | |||
| /// @return 0 for success / others for fail | |||
| /// | |||
| GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | |||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); | |||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, | |||
| const char *om_file); | |||
| /// | |||
| /// @ingroup ge | |||
| @@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME | |||
| /// \param var_info [in] host variable addr infos. | |||
| /// \param mem_type [in] memory type for rdma pool. | |||
| /// \return Status result of function | |||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | |||
| rtMemType_t mem_type = RT_MEMORY_HBM); | |||
| /// | |||
| /// \param tensor_info [in] description for tensor stored shared memory. | |||
| @@ -1 +1 @@ | |||
| Subproject commit a2b80cb22a62a6757c7dd31e684ca632e0b79268 | |||
| Subproject commit 4a9bfd772cad72ff281a2e21d59b8d225a26789c | |||
| @@ -1 +1 @@ | |||
| Subproject commit cfabf622b803d5957563a73652a0ce5086aab99d | |||
| Subproject commit 86162f60807c063f7344f902e443fc99657be637 | |||
| @@ -19,7 +19,6 @@ add_subdirectory(depends/cce) | |||
| add_subdirectory(depends/slog) | |||
| add_subdirectory(depends/mmpa) | |||
| add_subdirectory(depends/runtime) | |||
| add_subdirectory(depends/omg) | |||
| add_subdirectory(depends/hccl) | |||
| add_subdirectory(depends/profiler) | |||
| add_subdirectory(depends/error_manager) | |||
| @@ -29,6 +29,11 @@ include_directories(${GE_CODE_DIR}/inc/framework) | |||
| include_directories(${GE_CODE_DIR}/metadef/inc/external) | |||
| add_library(mmpa_stub SHARED ${SRCS}) | |||
| target_compile_options(mmpa_stub PRIVATE | |||
| -g | |||
| ) | |||
| target_link_libraries(mmpa_stub PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| -Wl,--no-as-needed | |||
| @@ -231,8 +231,12 @@ INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone) | |||
| INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen) | |||
| { | |||
| INT32 ret = EN_OK; | |||
| char *pRet = realpath(path, realPath); | |||
| if (pRet == NULL) { | |||
| if (path == nullptr || realPath == nullptr || realPathLen < MMPA_MAX_PATH) { | |||
| return EN_INVALID_PARAM; | |||
| } | |||
| char *ptr = realpath(path, realPath); | |||
| if (ptr == nullptr) { | |||
| ret = EN_ERROR; | |||
| } | |||
| return ret; | |||
| @@ -260,7 +264,7 @@ INT32 mmDlclose(VOID *handle) | |||
| CHAR *mmDlerror() | |||
| { | |||
| return ""; | |||
| return dlerror(); | |||
| } | |||
| INT32 mmDladdr(VOID *addr, mmDlInfo *info) | |||
| @@ -1,59 +0,0 @@ | |||
| # Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| #cmake_minimum_required(VERSION 2.8) | |||
| project(OMG_CCE) | |||
| set(CMAKE_CXX_STANDARD 11) | |||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | |||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) | |||
| include_directories(${GE_CODE_DIR}/inc) | |||
| include_directories(${GE_CODE_DIR}/metadef/inc) | |||
| include_directories(${GE_CODE_DIR}/inc/framework) | |||
| include_directories(${GE_CODE_DIR}/metadef/inc/graph) | |||
| include_directories(${GE_CODE_DIR}/inc/external) | |||
| include_directories(${GE_CODE_DIR}/metadef/inc/external) | |||
| include_directories(${GE_CODE_DIR}/metadef/inc/external/graph) | |||
| include_directories(${GE_CODE_DIR}/ge) | |||
| include_directories(${CMAKE_BINARY_DIR}) | |||
| include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||
| set(PROTO_LIST | |||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/task.proto" | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| set(SRCS | |||
| # "${GE_CODE_DIR}/src/ge/common/util.cc" | |||
| "src/omg_stub.cc" | |||
| ) | |||
| add_library(omg_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS}) | |||
| target_compile_definitions(omg_stub PRIVATE | |||
| google=ascend_private | |||
| ) | |||
| target_link_libraries(omg_stub PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| -Wl,--no-as-needed | |||
| ascend_protobuf | |||
| -Wl,--as-needed | |||
| c_sec | |||
| json | |||
| ) | |||
| @@ -1,878 +0,0 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <map> | |||
| #include <fstream> | |||
| #include <unordered_map> | |||
| #include <google/protobuf/io/coded_stream.h> | |||
| #include <google/protobuf/io/zero_copy_stream_impl.h> | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "common/debug/log.h" | |||
| #include "common/debug/memory_dumper.h" | |||
| #include "common/types.h" | |||
| #include "common/util.h" | |||
| #include "common/string_util.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "graph/model.h" | |||
| #include "cce/dnn.h" | |||
| #include "ge/ge_api_types.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| using domi::domiTensorFormat_t; | |||
| using namespace cce; | |||
| using namespace ge; | |||
| struct PROC_PARAM { | |||
| uint8_t *model_name; | |||
| // ISV Ek buffer | |||
| uint8_t *model_key; | |||
| uint32_t model_key_len; | |||
| // ISV root certificate buffer | |||
| uint8_t *root_cert; | |||
| uint32_t root_cert_len; | |||
| // ISV private key buffer | |||
| uint8_t *pri_key; | |||
| uint32_t pri_key_len; | |||
| // Raw AI Module Image buffer | |||
| uint8_t *ai_image; | |||
| uint32_t ai_image_len; | |||
| // ISV HW key buffer | |||
| uint8_t *hw_key; | |||
| uint32_t hw_key_len; | |||
| }; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| using namespace ge; | |||
| namespace { | |||
| const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR"; | |||
| const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework"; | |||
| const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result"; | |||
| const char FILE_SEPARATE[] = "/"; | |||
| } // namespace | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| namespace ge { | |||
| struct GeModelPartition { | |||
| ModelPartitionType type_ = MODEL_DEF; | |||
| uint8_t *data_ = nullptr; | |||
| size_t size_ = 0; | |||
| GeModelPartition() = default; | |||
| GeModelPartition(const GeModelPartition &partition){}; | |||
| GeModelPartition &operator=(const GeModelPartition &partition) = delete; | |||
| ~GeModelPartition() { | |||
| if (data_ != nullptr) { | |||
| delete[] data_; | |||
| data_ = nullptr; | |||
| } | |||
| } | |||
| Status SetData(uint8_t *data, size_t size) { | |||
| size_ = size; | |||
| data_ = new (std::nothrow) uint8_t[size](); | |||
| errno_t err; | |||
| err = memcpy_s(data_, size_, data, size); | |||
| if (err) { | |||
| GELOGE(ge::FAILED, "[GeModel Partition] Error occur when copy GeModel Partition data."); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetType(ModelPartitionType type) { | |||
| type_ = type; | |||
| return SUCCESS; | |||
| } | |||
| }; | |||
| struct OmFileContext { | |||
| vector<GeModelPartition> partition_datas_; | |||
| vector<char> partition_table_; | |||
| uint32_t model_data_len_; | |||
| }; | |||
| class SubGraphInfo; | |||
| using SubGraphInfoPtr = std::shared_ptr<ge::SubGraphInfo>; | |||
| using GeModelPartitionPtr = std::shared_ptr<GeModelPartition>; | |||
| using ModelPtr = std::shared_ptr<ge::Model>; | |||
| class GeModel { | |||
| public: | |||
| explicit GeModel(const ModelPtr &model_ptr); | |||
| ~GeModel() = default; | |||
| GeModel(const GeModel &other) = delete; | |||
| GeModel &operator=(const GeModel &other) = delete; | |||
| ModelPtr GetModelPtr() const; | |||
| Status AddPartition(uint8_t *data, size_t size, ModelPartitionType type); | |||
| Status GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition); | |||
| uint8_t GetPlatformType() const; | |||
| void SetPlatformType(const uint8_t platform_type) { platform_type_ = platform_type; } | |||
| private: | |||
| std::map<ModelPartitionType, GeModelPartitionPtr> partitions_; | |||
| ModelPtr model_ = nullptr; | |||
| uint8_t platform_type_ = {0}; | |||
| }; | |||
| using GeModelPtr = std::shared_ptr<ge::GeModel>; | |||
| GeModel::GeModel(const ModelPtr &model_ptr) { this->model_ = model_ptr; } | |||
| ModelPtr GeModel::GetModelPtr() const { return this->model_; } | |||
| uint8_t GeModel::GetPlatformType() const { return platform_type_; } | |||
| Status GeModel::AddPartition(uint8_t *data, size_t size, ModelPartitionType type) { | |||
| if (size == 0) { | |||
| return FAILED; | |||
| } | |||
| if (data == nullptr) { | |||
| return FAILED; | |||
| } | |||
| auto iter = partitions_.find(type); | |||
| if (iter != partitions_.end()) { | |||
| return FAILED; | |||
| } | |||
| GeModelPartitionPtr partition = nullptr; | |||
| GE_MAKE_SHARED(partition = std::make_shared<ge::GeModelPartition>(), return FAILED); | |||
| Status ret = partition->SetType(type); | |||
| if (ret != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| ret = partition->SetData(data, size); | |||
| if (ret != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| partitions_.insert(std::pair<ModelPartitionType, GeModelPartitionPtr>(type, partition)); | |||
| return SUCCESS; | |||
| } | |||
| Status GeModel::GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition) { | |||
| auto iter = partitions_.find(type); | |||
| if (iter == partitions_.end()) { | |||
| return FAILED; | |||
| } | |||
| partition = iter->second; | |||
| return SUCCESS; | |||
| } | |||
| class OmFileSaveHelper { | |||
| public: | |||
| OmFileSaveHelper(); | |||
| ~OmFileSaveHelper(); | |||
| vector<GeModelPartition> &GetModelPartitions(); | |||
| ModelPartitionTable *GetPartitionTable(); | |||
| ModelFileHeader model_header_; | |||
| ModelFileHeader &GetModelFileHeader() { return model_header_; } | |||
| void AddPartition(GeModelPartition &partition); | |||
| private: | |||
| OmFileContext context_; | |||
| }; | |||
| OmFileSaveHelper::OmFileSaveHelper() {} | |||
| OmFileSaveHelper::~OmFileSaveHelper() {} | |||
| vector<GeModelPartition> &OmFileSaveHelper::GetModelPartitions() { | |||
| static std::vector<GeModelPartition> tmp; | |||
| return tmp; | |||
| } | |||
| ModelPartitionTable *OmFileSaveHelper::GetPartitionTable() { return nullptr; } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OmFileSaveHelper::AddPartition(GeModelPartition &partition) { | |||
| context_.partition_datas_.push_back(partition); | |||
| context_.model_data_len_ += partition.size_; | |||
| } | |||
| class ModelBuilder { | |||
| public: | |||
| ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs, | |||
| const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode); | |||
| virtual ~ModelBuilder(); | |||
| Status BuildModel(ge::Model &model_def); | |||
| Status SaveWeightsToModel(ge::Model &model); | |||
| Status SaveDataToModel(ge::Model &model, ge::GeModel &ge_model); | |||
| Status PreBuildModel(); | |||
| Status BuildModelForGetTask(ge::Model &model_def); | |||
| ge::Buffer GetWeightBuffer() const; | |||
| void SetModelVersion(ge::Model &model_def); | |||
| public: | |||
| ge::Buffer weight_buffer_; | |||
| }; | |||
| ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector<SubGraphInfoPtr> &subgraphs, | |||
| const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) { | |||
| weight_buffer_ = ge::Buffer(4100000); | |||
| } | |||
| ModelBuilder::~ModelBuilder() {} | |||
| Status ModelBuilder::SaveWeightsToModel(ge::Model &model) { return SUCCESS; } | |||
| Status ModelBuilder::BuildModel(ge::Model &model_def) { return SUCCESS; } | |||
| Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { return SUCCESS; } | |||
| Status ModelBuilder::PreBuildModel() { return SUCCESS; } | |||
| Status ModelBuilder::BuildModelForGetTask(ge::Model &model_def) { return SUCCESS; } | |||
| void ModelBuilder::SetModelVersion(ge::Model &model_def) { return; } | |||
| ge::Buffer ModelBuilder::GetWeightBuffer() const { return ge::Buffer(4100000); } | |||
| } // namespace ge | |||
| using ProcParam = struct PROC_PARAM; | |||
| namespace ge { | |||
| #include <iostream> | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N = 0; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C = 1; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H = 2; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W = 3; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N = 0; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H = 1; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W = 2; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C = 3; | |||
| const uint32_t MODEL_FILE_MAGIC_NUM = 0x444F4D49; | |||
| const uint32_t MODEL_FILE_HEAD_LEN = 256; | |||
| const uint32_t MODEL_VERSION = 0x10000000; | |||
| const int MAX_FILE_SIZE_LIMIT = INT_MAX; | |||
| bool FC_WEIGHT_COMPRESS_FLAG = false; | |||
| bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length) { | |||
| length = 10; | |||
| *buffer = new (std::nothrow) char[10](); | |||
| GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(*buffer == nullptr, false, "new an object failed."); | |||
| return true; | |||
| } | |||
| bool ReadProtoFromText(const char *file, google::protobuf::Message *message) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((nullptr == file || nullptr == message), return false, | |||
| "incorrect parameter. nullptr == file || nullptr == message"); | |||
| string real_path = RealPath(file); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "proto file path '%s' not valid", file); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path.c_str()) == -1, return false, "file size not valid."); | |||
| std::ifstream fs(real_path.c_str(), std::ifstream::in); | |||
| if (!fs.is_open()) { | |||
| GELOGE(ge::FAILED, "proto file '%s' open fail.", file); | |||
| return false; | |||
| } | |||
| google::protobuf::io::IstreamInputStream input(&fs); | |||
| bool ret = google::protobuf::TextFormat::Parse(&input, message); | |||
| GE_IF_BOOL_EXEC(ret != true, | |||
| GELOGI("call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); | |||
| fs.close(); | |||
| return ret; | |||
| } | |||
| uint64_t GetCurrentTimestap() { return 0; } | |||
| // get length of file | |||
| long GetFileLength(const std::string &input_file) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(input_file.empty(), return -1, "input_file path is null."); | |||
| string real_path = RealPath(input_file.c_str()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); | |||
| unsigned long long file_length = 0; | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1, | |||
| "open file failed."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.", | |||
| file_length, MAX_FILE_SIZE_LIMIT); | |||
| return file_length; | |||
| } | |||
| string RealPath(const char *path) { | |||
| string s = path; | |||
| if (s.size() >= PATH_MAX) { | |||
| return ""; | |||
| } | |||
| if (s == "." || s == "1") { | |||
| return path; | |||
| // for insert_aipp_op unittest | |||
| } else if (s.substr(0, 3) == "llt") { | |||
| return path; | |||
| } else { | |||
| return "22"; | |||
| } | |||
| } | |||
| bool CheckInputPathValid(const string &file_path) { return true; } | |||
| bool ReadProtoFromArray(const void *data, int size, Message *proto) { return true; } | |||
| struct ModelPartition { | |||
| ModelPartitionType type; | |||
| uint8_t *data = 0; | |||
| uint32_t size = 0; | |||
| }; | |||
| class InsertNewOpUtil { | |||
| public: | |||
| InsertNewOpUtil(); | |||
| ~InsertNewOpUtil(); | |||
| Status InsertNewOps(const ComputeGraphPtr &graph); | |||
| Status InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path); | |||
| Status Parse(const char *conf_path); | |||
| }; | |||
| InsertNewOpUtil::InsertNewOpUtil() {} | |||
| Status InsertNewOpUtil::InsertNewOps(const ComputeGraphPtr &graph) { return SUCCESS; } | |||
| Status InsertNewOpUtil::InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path) { return SUCCESS; } | |||
| Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; } | |||
| Status InitOME() { return SUCCESS; } | |||
| class GraphOptimizer { | |||
| public: | |||
| Status Optimize(); | |||
| Status OptimizeAfterCal(); | |||
| Status AdjustDataOpDesc(); | |||
| Status InsertTransOp(); | |||
| Status FusionFmkop(); | |||
| Status Optimize4Cloud(); | |||
| Status Optimize4FlowCtrl(); | |||
| Status OptimizeBeforeBuild(); | |||
| }; | |||
| Status GraphOptimizer::Optimize() { return SUCCESS; } | |||
| Status Init(Options options) { return SUCCESS; } | |||
| Status Shutdown(Options options) { return SUCCESS; } | |||
| class Session { | |||
| public: | |||
| // singleton | |||
| static Session *Instance(); | |||
| const uint32_t &DeviceId() const; | |||
| }; | |||
| const uint32_t &Session::DeviceId() const { return 0; } | |||
| Session *Session::Instance() { | |||
| static Session instance; | |||
| return &instance; | |||
| } | |||
| struct OmgContext { | |||
| domiTensorFormat_t format; | |||
| // get input format from cmd | |||
| std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||
| std::vector<domiTensorFormat_t> output_formats; | |||
| // user-designate input dims | |||
| std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||
| // global input dims | |||
| std::map<std::string, std::vector<int64_t>> input_dims; | |||
| // solve rename op e.g: Detectionoutput:SsdDetectiontOutput | |||
| std::map<std::string, std::string> op_conf_map; | |||
| // save output node of network: key is op name, value = index, index is the output index of op | |||
| std::map<std::string, std::vector<int32_t>> out_nodes_map; | |||
| // user-designate out nodes (this is used for determing the orders) | |||
| std::vector<std::pair<std::string, int32_t>> user_out_nodes; | |||
| // save the path of cutsom_aicpu | |||
| std::vector<std::string> aicpu_op_run_paths; | |||
| // save ddk | |||
| std::string ddk_version; | |||
| // save format | |||
| domiTensorFormat_t net_format; | |||
| FrameworkType type; | |||
| // RunMode run_mode; | |||
| bool train_flag = false; | |||
| std::string output_type; | |||
| /// save the name of network | |||
| /// eg:faster-rcnn, based on FirstStageProcessor after scope_fusion is faster-rcnn | |||
| /// then reorder conv+reshape of FirstStageBoxPredictor/BoxEncodingPredictor | |||
| /// need to delete op of reshape | |||
| std::string net_name; | |||
| }; | |||
| } // namespace ge | |||
| namespace domi { | |||
| ge::OmgContext &GetContext() { | |||
| static ge::OmgContext tmp; | |||
| return tmp; | |||
| } | |||
| } // namespace domi | |||
| namespace ge { | |||
| class OpUtils { | |||
| public: | |||
| static Status InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor); | |||
| static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim, | |||
| ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); | |||
| static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor); | |||
| }; | |||
| Status OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor) { | |||
| ccCreatePoolingMaskDescriptor(&cc_tensor); | |||
| return SUCCESS; | |||
| } | |||
| Status OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim, | |||
| ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { | |||
| Status ret = SUCCESS; | |||
| return ret; | |||
| } | |||
| class FileSaver { | |||
| public: | |||
| Status SaveToFile(const string &file_path, ModelFileHeader &model_file_header, | |||
| ModelPartitionTable &model_partition_table, const std::vector<ModelPartition> &partition_datas); | |||
| Status SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, | |||
| const ModelFileHeader *model_file_header, bool check_sum); | |||
| }; | |||
| Status FileSaver::SaveToFile(const string &file_path, ModelFileHeader &model_file_header, | |||
| ModelPartitionTable &model_partition_table, | |||
| const std::vector<ModelPartition> &partition_datas) { | |||
| return SUCCESS; | |||
| } | |||
| Status FileSaver::SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, | |||
| const ModelFileHeader *model_file_header, bool check_sum) { | |||
| return SUCCESS; | |||
| } | |||
| class ModelSaver : public FileSaver {}; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor( | |||
| ccTensorDescriptor_t &cc_tensor) { | |||
| if (nullptr != cc_tensor) { | |||
| ccStatus_t ret = ccDestroyTensorDescriptor(&cc_tensor); | |||
| GE_LOGE_IF(CC_STATUS_SUCCESS != ret, "ccDestroyTensorDescriptor failed. ret = %d", ret); | |||
| cc_tensor = nullptr; | |||
| } | |||
| } | |||
| } // namespace ge | |||
| namespace domi { | |||
| class OpRegistrationData {}; | |||
| class OpRegistry { | |||
| public: | |||
| static OpRegistry *Instance(); | |||
| std::vector<OpRegistrationData> registration_datas; | |||
| ImplyType GetImplyType(const std::string &op_type); | |||
| void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type); | |||
| }; | |||
| OpRegistry *OpRegistry::Instance() { | |||
| static OpRegistry instance; | |||
| return &instance; | |||
| } | |||
| void OpRegistry::GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const ImplyType &imply_type) { | |||
| if (imply_type == ImplyType::AI_CPU) { | |||
| vec_op_type.push_back("square"); | |||
| } | |||
| } | |||
| class OpRegistrationTbe { | |||
| public: | |||
| static OpRegistrationTbe *Instance(); | |||
| bool Finalize(OpRegistrationData ®_data, bool is_train); | |||
| }; | |||
| OpRegistrationTbe *OpRegistrationTbe::Instance() { | |||
| static OpRegistrationTbe instance; | |||
| return &instance; | |||
| } | |||
| bool OpRegistrationTbe::Finalize(OpRegistrationData ®_data, bool is_train) { return true; } | |||
| } // namespace domi | |||
| namespace ge { | |||
| class GraphPrepare { | |||
| private: | |||
| Status OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph); | |||
| }; | |||
| Status GraphPrepare::OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph) { return SUCCESS; } | |||
| } // namespace ge | |||
| namespace ge { | |||
| Status GetOriginalType(const ge::NodePtr &node, string &type) { | |||
| type = node->GetType(); | |||
| GE_IF_BOOL_EXEC(type != FRAMEWORKOP, return SUCCESS); | |||
| ge::AttrUtils::GetStr(node->GetOpDesc(), "original_type", type); | |||
| return SUCCESS; | |||
| } | |||
| Status SetCycleEvent(const ge::NodePtr &node) { return SUCCESS; } | |||
| Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = AttrUtils::CloneOpDesc(node->GetOpDesc()); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| if (!AttrUtils::SetStr(tmp_desc, "_stream_label", label)) { | |||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetActiveLabelList(const ge::NodePtr &node, const std::vector<std::string> &label) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| // add list of active_label | |||
| if (!AttrUtils::SetListStr(tmp_desc, "_active_label", label)) { | |||
| GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &branch_label) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| // add branch_label of switch | |||
| if (!AttrUtils::SetStr(tmp_desc, "_switch_branch_node_label", branch_label)) { | |||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| // add switch_true_branch_flag | |||
| if (!AttrUtils::SetBool(tmp_desc, "_switch_true_branch_flag", value)) { | |||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| // record original_node_name | |||
| if (!AttrUtils::SetStr(tmp_desc, "_original_node_name", orig_name)) { | |||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetCyclicDependenceFlag(const ge::NodePtr &node) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| // add cyclic_dependence_flag | |||
| if (!AttrUtils::SetBool(tmp_desc, "_cyclic_dependence_flag", true)) { | |||
| GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SetNextIteration(const ge::NodePtr &node, const std::string &next) { | |||
| GE_CHECK_NOTNULL(node); | |||
| OpDescPtr tmp_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(tmp_desc); | |||
| if (!AttrUtils::SetStr(tmp_desc, "_next_iteration_node", next)) { | |||
| GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| namespace cce { | |||
| bool ccGetFuncState(ccFuncParamType_t type) { return true; } | |||
| } // namespace cce | |||
| namespace ge { | |||
| Status UnloadModel(uint32_t model_id) { return SUCCESS; } | |||
| Status GetInputOutputDescInfo(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc) { | |||
| return SUCCESS; | |||
| } | |||
| Status DataInput(const InputData *input_data, OutputData *output_data) { return SUCCESS; } | |||
| /* | |||
| class ModelManager { | |||
| public: | |||
| static std::shared_ptr<ModelManager> GetInstance(); | |||
| static void FinalizeForPtr(ModelManager *) {} | |||
| Status DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs, | |||
| std::vector<ge::TensorInfo> &outputs); | |||
| Status DataInput(const InputData &input_data, OutputData &output_data); | |||
| Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc); | |||
| Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats); | |||
| Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, std::vector<uint32_t> &output_formats); | |||
| Status Stop(uint32_t model_id); | |||
| Status Unload(uint32_t model_id); | |||
| Status LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model, | |||
| std::shared_ptr<ModelListener> listener); | |||
| Status Start(uint32_t model_id); | |||
| Status GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size); | |||
| Status LoadModelOffline(uint32_t &model_id, const ModelData &model, std::shared_ptr<ModelListener> listener = nullptr, | |||
| void *dev_ptr = nullptr, size_t mem_size = 0, void *weight_ptr = nullptr, | |||
| size_t weight_size = 0); | |||
| Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids, | |||
| const std::vector<uint32_t> &output_queue_ids); | |||
| Status HandleCommand(const Command &command); | |||
| Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data); | |||
| void DestroyAicpuSession(uint64_t session_id); | |||
| }; | |||
| void ModelManager::DestroyAicpuSession(uint64_t session_id) {} | |||
| std::shared_ptr<ModelManager> ModelManager::GetInstance() { | |||
| static std::shared_ptr<ModelManager> instance_ptr = | |||
| shared_ptr<ModelManager>(new ModelManager(), ModelManager::FinalizeForPtr); | |||
| return instance_ptr; | |||
| } | |||
| Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<ge::TensorInfo> &inputs, | |||
| std::vector<ge::TensorInfo> &outputs) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { return SUCCESS; } | |||
| Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||
| std::vector<InputOutputDescInfo> &input_desc, | |||
| std::vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::Stop(uint32_t model_id) { return SUCCESS; } | |||
| Status ModelManager::Unload(uint32_t model_id) { return SUCCESS; } | |||
| Status ModelManager::LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model, | |||
| std::shared_ptr<ModelListener> listener) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::Start(uint32_t model_id) { return SUCCESS; } | |||
| Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { return SUCCESS; } | |||
| Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | |||
| void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | |||
| const std::vector<uint32_t> &input_queue_ids, | |||
| const std::vector<uint32_t> &output_queue_ids) { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelManager::HandleCommand(const Command &command) { return SUCCESS; } | |||
| Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data) { | |||
| return SUCCESS; | |||
| } | |||
| */ | |||
| } // namespace ge | |||
| namespace ge { | |||
| enum JobState { | |||
| JOBSTATE_WAITING = 1, | |||
| JOBSTATE_RUNNING, | |||
| JOBSTATE_KILLING, | |||
| JOBSTATE_SUCCEED, | |||
| JOBSTATE_FAILED, | |||
| JOBSTATE_KILLED, | |||
| JOBSTATE_UNKOWN | |||
| }; | |||
| enum JobSubState { | |||
| JOBSUBSTATE_ENV_INIT = 201, | |||
| JOBSUBSTATE_ENV_FIN, | |||
| JOBSUBSTATE_RESOUCE_ALLOC, | |||
| JOBSUBSTATE_MODEL_COMPILE, | |||
| JOBSUBSTATE_GRAPH_PREPARE, | |||
| JOBSUBSTATE_GRAPH_SPLIT, | |||
| JOBSUBSTATE_GRAPH_OPTIMIZE, | |||
| JOBSUBSTATE_GRAPH_BUILD, | |||
| JOBSUBSTATE_GRAPH_LOAD, | |||
| JOBSUBSTATE_GRAPH_EXEC, | |||
| JOBSUBSTATE_GRAPH_UNLOAD, | |||
| JOBSUBSTATE_OTHER | |||
| }; | |||
| enum ErrorModule { | |||
| ERROR_MODULE_DRIVER = 0x01, | |||
| ERROR_MODULE_RUNTIME = 0x04, | |||
| ERROR_MODULE_CCE = 0x06, | |||
| ERROR_MODULE_FMK = 0x08, | |||
| ERROR_MODULE_HCCL = 0x12 | |||
| }; | |||
| class CsaInteract { | |||
| public: | |||
| CsaInteract &GetInstance(); | |||
| void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state); | |||
| void Init(int32_t dev_index, int64_t job_id); | |||
| Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER, | |||
| uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK); | |||
| // device index | |||
| int32_t dev_index_; | |||
| // job id | |||
| int64_t job_id_; | |||
| // is initialization complete | |||
| bool is_init_; | |||
| // current job state | |||
| JobState curr_state_; | |||
| // job state file | |||
| std::string job_state_file_; | |||
| // network connectivity detect file | |||
| std::string hcom_detect_file_; | |||
| // identification of internal errors that occurred during the training | |||
| bool is_have_internal_error_; | |||
| }; | |||
| CsaInteract &CsaInteract::GetInstance() { | |||
| static CsaInteract instance; | |||
| return instance; | |||
| } | |||
| void CsaInteract::Init(int32_t dev_index, int64_t job_id) { | |||
| if (!is_init_) { | |||
| dev_index_ = dev_index; | |||
| job_id_ = job_id; | |||
| string csa_path_prefix; | |||
| if (std::getenv(FMK_STATUS_FILE_DIR_ENV) != nullptr) { | |||
| csa_path_prefix = std::getenv(FMK_STATUS_FILE_DIR_ENV); | |||
| } | |||
| if (!csa_path_prefix.empty()) { | |||
| std::string job_state_file = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME; | |||
| std::string hcom_detect_file = | |||
| csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME; | |||
| job_state_file_ = RealPath(job_state_file.c_str()); | |||
| hcom_detect_file_ = RealPath(hcom_detect_file.c_str()); | |||
| } | |||
| is_init_ = true; | |||
| } | |||
| } | |||
| void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) {} | |||
| } // namespace ge | |||
| Status ModelParserBase::LoadFromFile(const char *model_path, const char *key, int32_t priority, | |||
| ge::ModelData &model_data) { | |||
| return SUCCESS; | |||
| } | |||
| Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode, | |||
| ErrorModule error_module) { | |||
| return SUCCESS; | |||
| } | |||
| namespace ge { | |||
| static std::map<ge::DataType, uint32_t> data_type_to_length = { | |||
| {DT_BOOL, sizeof(bool)}, {DT_INT64, sizeof(int64_t)}, {DT_UINT64, sizeof(int64_t)}, {DT_FLOAT, sizeof(float)}, | |||
| {DT_INT32, sizeof(int32_t)}, {DT_UINT32, sizeof(int32_t)}, {DT_INT8, sizeof(char)}, {DT_UINT8, sizeof(char)}, | |||
| {DT_INT16, sizeof(int16_t)}, {DT_UINT16, sizeof(int16_t)}, {DT_FLOAT16, sizeof(int16_t)}, {DT_DOUBLE, sizeof(double)}, | |||
| }; | |||
| class TypeUtils { | |||
| public: | |||
| static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length); | |||
| static bool CheckUint64MulOverflow(uint64_t a, uint32_t b); | |||
| }; | |||
| bool TypeUtils::GetDataTypeLength(ge::DataType data_type, uint32_t &length) { | |||
| auto it = data_type_to_length.find(data_type); | |||
| if (it != data_type_to_length.end()) { | |||
| length = it->second; | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| bool TypeUtils::CheckUint64MulOverflow(uint64_t a, uint32_t b) { | |||
| // Not overflow | |||
| if (a == 0) { | |||
| return false; | |||
| } | |||
| if ((ULLONG_MAX / a) >= b) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace ge | |||
| @@ -27,8 +27,8 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { | |||
| } | |||
| rtError_t rtCtxGetCurrent(rtContext_t *ctx) { | |||
| int x = 1; | |||
| *ctx = (void *)x; | |||
| uintptr_t x = 1; | |||
| *ctx = (rtContext_t *)x; | |||
| return RT_ERROR_NONE; | |||
| } | |||
| @@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char | |||
| rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | |||
| rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | |||
| rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } | |||
| rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, | |||
| rtStream_t stream) { | |||
| return RT_ERROR_NONE; | |||
| @@ -156,7 +163,7 @@ rtError_t rtSetKernelReportCallback(rtKernelReportCallback callback) { | |||
| rt_kernel_info.module_addr = (void *)100; | |||
| rt_kernel_info.module_size = 100; | |||
| rtStream_t stream; | |||
| rtStream_t stream = nullptr; | |||
| callback(stream, &rt_kernel_info); | |||
| return RT_ERROR_NONE; | |||
| } | |||
| @@ -193,7 +200,8 @@ rtError_t rtModelCreate(rtModel_t *model, uint32_t flag) { | |||
| } | |||
| rtError_t rtModelDestroy(rtModel_t model) { | |||
| delete model; | |||
| uint32_t *stub = static_cast<uint32_t *>(model); | |||
| delete stub; | |||
| return RT_ERROR_NONE; | |||
| } | |||
| @@ -18,23 +18,23 @@ project(ut_ge) | |||
| set(CMAKE_CXX_STANDARD 11) | |||
| set(PROTO_LIST | |||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/insert_op.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/dump_task.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto" | |||
| ) | |||
| "${GE_CODE_DIR}/metadef/proto/om.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/insert_op.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/dump_task.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/fwk_adapter.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/op_mapping_info.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/optimizer_priority.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/ge_api.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/attr_value.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/resource_handle.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/tensor_shape.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/types.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/tensorflow/node_def.proto" | |||
| "${GE_CODE_DIR}/metadef/proto/proto_inner/ge_onnx.proto" | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| @@ -135,6 +135,7 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/common/types.cc" | |||
| "${GE_CODE_DIR}/ge/common/fmk_error_codes.cc" | |||
| "${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc" | |||
| "${GE_CODE_DIR}/ge/common/context/ctx.cc" | |||
| "${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc" | |||
| "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" | |||
| "${GE_CODE_DIR}/ge/generator/ge_generator.cc" | |||
| @@ -163,7 +164,7 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | |||
| "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | |||
| "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" | |||
| @@ -266,8 +267,8 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | |||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||
| "${GE_CODE_DIR}/ge/model/ge_model.cc" | |||
| "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | |||
| @@ -393,14 +394,13 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" | |||
| "${GE_CODE_DIR}/ge/common/properties_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||
| "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | |||
| "${GE_CODE_DIR}/ge/common/util.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | |||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" | |||
| @@ -458,7 +458,7 @@ set(GRAPH_BUILD_COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | |||
| "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | |||
| "${GE_CODE_DIR}/ge/common/thread_pool.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/base.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||
| "${GE_CODE_DIR}/ge/graph/build/run_context.cc" | |||
| "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | |||
| ) | |||
| @@ -627,7 +627,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||
| #"graph/load/new_model_manager_davinci_model_unittest.cc" | |||
| "graph/load/model_manager_unittest.cc" | |||
| #"graph/load/new_model_manager_task_build_unittest.cc" | |||
| "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" | |||
| "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" | |||
| "graph/load/end_graph_task_unittest.cc" | |||
| "graph/load/new_model_manager_event_manager_unittest.cc" | |||
| #"graph/load/output_net_output_unittest.cc" | |||
| @@ -638,7 +638,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||
| "graph/load/kernel_task_info_unittest.cc" | |||
| "graph/load/memcpy_addr_async_task_info_unittest.cc" | |||
| "graph/load/memcpy_async_task_info_unittest.cc" | |||
| "graph/load/cpu_queue_schedule_unittest.cc" | |||
| "graph/load/cpu_queue_schedule_unittest.cc" | |||
| #"graph/graph_load_unittest.cc" | |||
| "graph/ge_executor_unittest.cc" | |||
| "graph/load/model_helper_unittest.cc" | |||
| @@ -671,7 +671,7 @@ set(PASS_TEST_FILES | |||
| "graph/passes/trans_op_depth_fusion_pass_unittest.cc" | |||
| "graph/passes/transop_nearby_allreduce_fusion_pass_unittest.cc" | |||
| "graph/passes/constant_folding_pass_unittest.cc" | |||
| "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" | |||
| "graph/passes/fuse_data_nodes_with_common_input_pass_unittest.cc" | |||
| "graph/passes/stop_gradient_pass_unittest.cc" | |||
| "graph/passes/prevent_gradient_pass_unittest.cc" | |||
| "graph/passes/identity_pass_unittest.cc" | |||
| @@ -752,25 +752,38 @@ set(MULTI_PARTS_TEST_FILES | |||
| "graph/build/mem_assigner_unittest.cc" | |||
| "graph/preprocess/graph_preprocess_unittest.cc" | |||
| "graph/manager/hcom_util_unittest.cc" | |||
| "graph/manager/graph_caching_allocator_unittest.cc" | |||
| "session/omg_omg_unittest.cc" | |||
| ) | |||
| set(GENERATOR_TEST_FILES | |||
| "generator/ge_generator_unittest.cc" | |||
| ) | |||
| set(EXECUTOR_TEST_FILES | |||
| "executor/ge_executor_unittest.cc" | |||
| ) | |||
| set(SINGLE_OP_TEST_FILES | |||
| #"single_op/single_op_model_unittest.cc" | |||
| "single_op/single_op_model_unittest.cc" | |||
| "single_op/single_op_manager_unittest.cc" | |||
| "single_op/stream_resource_unittest.cc" | |||
| "single_op/single_op_task_unittest.cc" | |||
| ) | |||
| set(PROFILING_MNG_TEST_FILES | |||
| "profiling/ge_profiling_manager_unittest.cc" | |||
| ) | |||
| set(HYBRID_TEST_FILES | |||
| "hybrid/ge_hybrid_unittest.cc" | |||
| ) | |||
| set(OTHERS_TEST_FILES | |||
| "plugin_manager/ge_util_unittest.cc" | |||
| ) | |||
| list(APPEND COMMON_SHARED_LIBRARIES | |||
| omg_stub | |||
| c_sec | |||
| slog_stub | |||
| cce_ge_stub | |||
| @@ -1055,10 +1068,13 @@ target_link_libraries(ut_libge_kernel_utest | |||
| # libge_distinct_load_utest | |||
| add_executable(ut_libge_distinct_load_utest | |||
| ${COMMON_TEST_FILES} | |||
| ${GENERATOR_TEST_FILES} | |||
| ${EXECUTOR_TEST_FILES} | |||
| ${DISTINCT_GRAPH_LOAD_TEST_FILES} | |||
| ${DISTINCT_GRAPH_LOAD_SRC_FILES} | |||
| ${SINGLE_OP_TEST_FILES} | |||
| ${PROFILING_MNG_TEST_FILES} | |||
| ${HYBRID_TEST_FILES} | |||
| ) | |||
| target_compile_options(ut_libge_distinct_load_utest PRIVATE | |||
| @@ -0,0 +1,42 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #define private public | |||
| #define protected public | |||
| #include "executor/ge_executor.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| using namespace std; | |||
| namespace ge { | |||
| class UtestGeExecutor : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(UtestGeExecutor, test_single_op_exec) { | |||
| GeExecutor exeutor; | |||
| ModelData model_data; | |||
| string model_name = "1234"; | |||
| EXPECT_EQ(exeutor.LoadSingleOp(model_name, model_data, nullptr, nullptr), ACL_ERROR_GE_INTERNAL_ERROR); | |||
| EXPECT_EQ(exeutor.LoadDynamicSingleOp(model_name, model_data, nullptr, nullptr), PARAM_INVALID); | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,78 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #define private public | |||
| #define protected public | |||
| #include "generator/ge_generator.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| using namespace std; | |||
| namespace ge { | |||
| class UtestGeGenerator : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(UtestGeGenerator, test_build_single_op_offline) { | |||
| GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
| TensorUtils::SetSize(tensor_desc, 512); | |||
| shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| GeTensor tensor(tensor_desc); | |||
| const vector<GeTensor> inputs = { tensor, tensor }; | |||
| const vector<GeTensor> outputs = { tensor }; | |||
| // not Initialize, impl is null. | |||
| GeGenerator generator; | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), PARAM_INVALID); | |||
| // const map<string, string> &options | |||
| generator.Initialize({}); | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||
| } | |||
| /* | |||
| TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||
| GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
| TensorUtils::SetSize(tensor_desc, 512); | |||
| shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); | |||
| GeTensor tensor(tensor_desc); | |||
| const vector<GeTensor> inputs = { tensor, tensor }; | |||
| const vector<GeTensor> outputs = { tensor }; | |||
| // not Initialize, impl is null. | |||
| GeGenerator generator; | |||
| generator.Initialize({}); | |||
| ModelBufferData model_buffer; | |||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||
| } | |||
| */ | |||
| } // namespace ge | |||
| @@ -25,10 +25,12 @@ | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "omg/omg_inner_types.h" | |||
| #include "../passes/graph_builder_utils.h" | |||
| #define protected public | |||
| #define private public | |||
| #include "graph/build/memory/binary_block_mem_assigner.h" | |||
| #include "graph/build/memory/graph_mem_assigner.h" | |||
| #include "graph/build/memory/hybrid_mem_assigner.h" | |||
| #include "graph/build/memory/max_block_mem_assigner.h" | |||
| #undef protected | |||
| @@ -41,7 +43,7 @@ using domi::GetContext; | |||
| class UtestMemoryAssignerTest : public testing::Test { | |||
| public: | |||
| ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||
| ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||
| ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | |||
| auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | |||
| auto desc_temp = *desc_temp_ptr; | |||
| @@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test { | |||
| op_def->SetWorkspaceBytes(workspace_bytes); | |||
| return op_def; | |||
| } | |||
| void make_graph(ge::ComputeGraphPtr graph) { | |||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||
| ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||
| ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | |||
| auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | |||
| auto desc_temp = *desc_temp_ptr; | |||
| TensorUtils::SetSize(desc_temp, 1024); | |||
| op_def->AddInputDesc(desc_temp); | |||
| auto desc_output_ptr = make_shared<ge::GeTensorDesc>(); | |||
| auto desc_output = *desc_output_ptr; | |||
| TensorUtils::SetSize(desc_output, 6500); | |||
| ge::TensorUtils::SetReuseInput(desc_output, true); | |||
| ge::TensorUtils::SetReuseInputIndex(desc_output, 0); | |||
| op_def->AddOutputDesc(desc_output); | |||
| std::vector<int64_t> workspace_bytes; | |||
| workspace_bytes.push_back(wsByte); | |||
| op_def->SetWorkspaceBytes(workspace_bytes); | |||
| return op_def; | |||
| } | |||
| void MakeGraph(ge::ComputeGraphPtr &graph) { | |||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||
| op_def_a->SetStreamId(0); | |||
| ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); | |||
| ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); | |||
| op_def_b->SetStreamId(0); | |||
| ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000); | |||
| ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000); | |||
| op_def_c->SetStreamId(1); | |||
| ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000); | |||
| ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000); | |||
| op_def_d->SetStreamId(2); | |||
| ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000); | |||
| ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000); | |||
| op_def_e->SetStreamId(3); | |||
| ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000); | |||
| ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000); | |||
| op_def_f->SetStreamId(2); | |||
| ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000); | |||
| ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000); | |||
| op_def_g->SetStreamId(3); | |||
| ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000); | |||
| ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000); | |||
| op_def_h->SetStreamId(2); | |||
| ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000); | |||
| ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000); | |||
| op_def_i->SetStreamId(2); | |||
| ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT); | |||
| ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT); | |||
| op_def_j->SetStreamId(3); | |||
| // add node | |||
| @@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test { | |||
| graph->TopologicalSorting(); | |||
| } | |||
| void make_reuse_graph(ge::ComputeGraphPtr graph) { | |||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||
| ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); | |||
| ge::OpDescPtr op_def_c = make_shared<ge::OpDesc>("C", "Some"); | |||
| auto desc_input_ptr = make_shared<ge::GeTensorDesc>(); | |||
| auto desc_input = *desc_input_ptr; | |||
| TensorUtils::SetSize(desc_input, 1024); | |||
| op_def_c->AddInputDesc(desc_input); | |||
| auto desc_output_ptr = make_shared<ge::GeTensorDesc>(); | |||
| auto desc_output = *desc_output_ptr; | |||
| TensorUtils::SetSize(desc_output, 6500); | |||
| ge::TensorUtils::SetReuseInput(desc_output, true); | |||
| ge::TensorUtils::SetReuseInputIndex(desc_output, 0); | |||
| op_def_c->AddOutputDesc(desc_output); | |||
| void MakeReuseGraph(ge::ComputeGraphPtr graph) { | |||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||
| ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); | |||
| ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000); | |||
| ge::OpDescPtr op_def_d = make_shared<ge::OpDesc>("D", "CONSTANT"); | |||
| ge::NodePtr node_a = graph->AddNode(op_def_a); | |||
| @@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test { | |||
| graph->TopologicalSorting(); | |||
| } | |||
| ComputeGraphPtr MakeCascadeContinuousMemoryGraph() { | |||
| ge::ut::GraphBuilder builder("graph"); | |||
| auto data = builder.AddNode("data", "Data", 1, 1); | |||
| auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | |||
| auto addn2 = builder.AddNode("addn2", "AddN", 1, 1); | |||
| auto addn3 = builder.AddNode("addn3", "AddN", 1, 1); | |||
| auto concat1 = builder.AddNode("concat1", "Concat", 2, 1); | |||
| auto concat2 = builder.AddNode("concat2", "Concat", 2, 1); | |||
| auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); | |||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); | |||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||
| ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); | |||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); | |||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||
| ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); | |||
| addn1->GetOpDesc()->SetOutputOffset({100}); | |||
| addn2->GetOpDesc()->SetOutputOffset({200}); | |||
| concat1->GetOpDesc()->SetOutputOffset({100}); | |||
| addn3->GetOpDesc()->SetOutputOffset({700}); | |||
| concat2->GetOpDesc()->SetOutputOffset({500}); | |||
| ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||
| ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||
| ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); | |||
| ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200}); | |||
| ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300}); | |||
| builder.AddDataEdge(data, 0, addn1, 0); | |||
| builder.AddDataEdge(data, 0, addn2, 0); | |||
| builder.AddDataEdge(addn1, 0, concat1, 0); | |||
| builder.AddDataEdge(addn2, 0, concat1, 1); | |||
| builder.AddDataEdge(concat1, 0, concat2, 0); | |||
| builder.AddDataEdge(addn3, 0, concat2, 1); | |||
| return builder.GetGraph(); | |||
| } | |||
| protected: | |||
| void SetUp() {} | |||
| @@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test { | |||
| /* | |||
| TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { | |||
| ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||
| ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); | |||
| ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||
| ge::NodePtr node_a = graph->AddNode(op_def_a); | |||
| MemoryBlock* memory_block = new MemoryBlock(0); | |||
| memory_block->Init(1, kOutput, node_a, 0, 1); | |||
| @@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner { | |||
| // when check GetMemoryRanges return fail, Assign return fail | |||
| TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { | |||
| ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||
| make_graph(graph); | |||
| MakeGraph(graph); | |||
| std::map<std::string, std::string> anchor_to_symbol; | |||
| std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | |||
| EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS); | |||
| @@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { | |||
| MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors); | |||
| EXPECT_EQ(mock_assigner.Assign(), FAILED); | |||
| } | |||
| TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { | |||
| ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph(); | |||
| auto addn1 = graph->FindNode("addn1"); | |||
| auto addn2 = graph->FindNode("addn2"); | |||
| EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100); | |||
| EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200); | |||
| GraphMemoryAssigner memoryAssigner(graph); | |||
| MemoryOffset memory_offset(RT_MEMORY_HBM, 0); | |||
| memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | |||
| EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS); | |||
| EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500); | |||
| EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); | |||
| } | |||
| @@ -34,7 +34,6 @@ | |||
| #include "common/types.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/task_info/kernel_task_info.h" | |||
| #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | |||
| @@ -109,6 +108,26 @@ static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||
| ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0); | |||
| return op_desc; | |||
| } | |||
| TEST_F(UtestGeExecutor, load_data_from_file) { | |||
| GeExecutor ge_executor; | |||
| ge_executor.isInit_ = true; | |||
| string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps"; | |||
| string self_smap = "/proc/" + std::to_string(getpid()) + "/maps"; | |||
| string copy_smap = "cp " + self_smap + " " + test_smap; | |||
| EXPECT_EQ(system(copy_smap.c_str()), 0); | |||
| ModelData model_data; | |||
| EXPECT_EQ(ge_executor.LoadDataFromFile(test_smap, model_data), SUCCESS); | |||
| EXPECT_NE(model_data.model_data, nullptr); | |||
| delete[] static_cast<char *>(model_data.model_data); | |||
| model_data.model_data = nullptr; | |||
| ge_executor.isInit_ = false; | |||
| } | |||
| /* | |||
| TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) { | |||
| uint32_t model_id = 1; | |||
| @@ -24,7 +24,6 @@ | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "common/types.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/op_desc.h" | |||
| #include "graph/types.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| @@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
| model.SinkModelProfile(); | |||
| } | |||
| TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| DavinciModel model(0, nullptr); | |||
| InputData current_data; | |||
| model.SinkTimeProfile(current_data); | |||
| } | |||
| } // namespace ge | |||
| @@ -25,7 +25,6 @@ | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| using namespace std; | |||
| using namespace testing; | |||
| @@ -21,7 +21,7 @@ | |||
| #include "common/debug/log.h" | |||
| #include "common/l2_cache_optimize.h" | |||
| #include "common/model_parser/base.h" | |||
| #include "common/model_parser/model_parser.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/types.h" | |||
| @@ -31,7 +31,6 @@ | |||
| #include "common/op/ge_op_utils.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/davinci_model_parser.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| //#include "new_op_test_utils.h" | |||
| #undef private | |||
| @@ -0,0 +1,87 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <memory> | |||
| #include "graph/anchor.h" | |||
| #include "graph/attr_value.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "omg/omg_inner_types.h" | |||
| #define protected public | |||
| #define private public | |||
| #include "graph/manager/graph_caching_allocator.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #undef protected | |||
| #undef private | |||
| using namespace std; | |||
| using namespace testing; | |||
| using namespace ge; | |||
| using domi::GetContext; | |||
| class UtestGraphCachingAllocatorTest : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() { GetContext().out_nodes_map.clear(); } | |||
| }; | |||
| TEST_F(UtestGraphCachingAllocatorTest, initialize_success) { | |||
| std::vector<rtMemType_t> mem_type; | |||
| mem_type.push_back(RT_MEMORY_HBM); | |||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||
| MemManager::Instance().Finalize(); | |||
| } | |||
| TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { | |||
| std::vector<rtMemType_t> mem_type; | |||
| mem_type.push_back(RT_MEMORY_HBM); | |||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||
| EXPECT_NE(nullptr, ptr); | |||
| MemManager::Instance().Finalize(); | |||
| } | |||
| TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) { | |||
| std::vector<rtMemType_t> mem_type; | |||
| mem_type.push_back(RT_MEMORY_HBM); | |||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||
| EXPECT_NE(nullptr, ptr); | |||
| ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize); | |||
| EXPECT_NE(nullptr, ptr); | |||
| MemManager::Instance().Finalize(); | |||
| } | |||
| TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { | |||
| std::vector<rtMemType_t> mem_type; | |||
| mem_type.push_back(RT_MEMORY_HBM); | |||
| EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||
| uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); | |||
| EXPECT_NE(nullptr, ptr); | |||
| uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize); | |||
| EXPECT_NE(nullptr, ptr); | |||
| EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS); | |||
| EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS); | |||
| MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks(); | |||
| MemManager::Instance().Finalize(); | |||
| } | |||
| @@ -0,0 +1,113 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <vector> | |||
| #include "runtime/rt.h" | |||
| #define protected public | |||
| #define private public | |||
| #include "hybrid/model/hybrid_model_builder.h" | |||
| #include "hybrid/model/hybrid_model.h" | |||
| #include "model/ge_model.h" | |||
| #include "model/ge_root_model.h" | |||
| #include "hybrid/node_executor/aicore/aicore_op_task.h" | |||
| #include "framework/common/taskdown_common.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "graph/ge_context.h" | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||
| #include "graph/types.h" | |||
| #undef private | |||
| #undef protected | |||
| using namespace std; | |||
| using namespace testing; | |||
| using namespace ge; | |||
| class UtestGeHybrid : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||
| op_desc->SetStreamId(0); | |||
| op_desc->SetId(0); | |||
| op_desc->SetWorkspace({}); | |||
| ; | |||
| op_desc->SetWorkspaceBytes({}); | |||
| op_desc->SetInputOffset({}); | |||
| op_desc->SetOutputOffset({}); | |||
| ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); | |||
| bool support_dynamic = true; | |||
| ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); | |||
| return op_desc; | |||
| } | |||
| TEST_F(UtestGeHybrid, aicore_op_task_init_success) { | |||
| // build aicore task | |||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||
| domi::TaskDef task_def; | |||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||
| kernel_with_handle->set_original_kernel_key(""); | |||
| kernel_with_handle->set_node_info(""); | |||
| kernel_with_handle->set_block_dim(32); | |||
| kernel_with_handle->set_args_size(64); | |||
| string args(64, '1'); | |||
| kernel_with_handle->set_args(args.data(), 64); | |||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||
| context->set_op_index(1); | |||
| context->set_kernel_type(2); // ccKernelType::TE | |||
| uint16_t args_offset[9] = {0}; | |||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||
| std::vector<char> kernelBin; | |||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||
| std::string kernel_name("kernel/Add"); | |||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||
| ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); | |||
| rtStream_t stream = nullptr; | |||
| rtStreamCreate(&stream, 0); | |||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||
| char *handle = ""; | |||
| aicore_task->handle_ = handle; | |||
| aicore_task->tiling_key_ = 1; | |||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||
| } | |||
| TEST_F(UtestGeHybrid, task_update_tiling_info) { | |||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||
| aicore_task->is_single_op_ = true; | |||
| auto graph = make_shared<ComputeGraph>("graph"); | |||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||
| ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key"); | |||
| ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); | |||
| auto node = graph->AddNode(op_desc); | |||
| optiling::OpRunInfo tiling_info; | |||
| ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS); | |||
| } | |||
| @@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||
| void TearDown() {} | |||
| }; | |||
| //rt api stub | |||
| rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_init_model) { | |||
| string model_data_str = "123456789"; | |||
| @@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||
| std::mutex stream_mu_; | |||
| rtStream_t stream_ = nullptr; | |||
| SingleOp single_op(&stream_mu_, stream_); | |||
| ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
| // SingleOp single_op(&stream_mu_, stream_); | |||
| // | |||
| // ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
| } | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_build_kernel_task) { | |||
| @@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||
| ASSERT_EQ(op_model.Init(), FAILED); | |||
| } | |||
| */ | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
| string model_data_str = "123456789"; | |||
| SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | |||
| @@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
| ASSERT_EQ(op.arg_table_[1].size(), 1); | |||
| ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | |||
| } | |||
| */ | |||
| TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||
| string name = "relu"; | |||
| string type = "relu"; | |||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||
| op_desc->SetStreamId(0); | |||
| op_desc->SetId(0); | |||
| TbeOpTask task; | |||
| task.op_desc_ = op_desc; | |||
| task.model_name_ = "resnet_50"; | |||
| task.model_id_ = 1; | |||
| TaskDescInfo task_desc_info; | |||
| uint32_t model_id; | |||
| task.GetProfilingArgs(task_desc_info, model_id); | |||
| ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||
| ASSERT_EQ(model_id, 1); | |||
| } | |||
| @@ -0,0 +1,117 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <vector> | |||
| #include "graph/load/model_manager/model_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "runtime/rt.h" | |||
| #define protected public | |||
| #define private public | |||
| #include "single_op/single_op_model.h" | |||
| #include "single_op/task/tbe_task_builder.h" | |||
| #include "single_op/task/op_task.h" | |||
| #include "single_op/task/tbe_task_builder.h" | |||
| #include "external/register/op_tiling_registry.h" | |||
| #undef private | |||
| #undef protected | |||
| using namespace std; | |||
| using namespace testing; | |||
| using namespace ge; | |||
| using namespace optiling; | |||
| class UtestSingleOpTask : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(UtestSingleOpTask, test_build_kernel_task) { | |||
| string model_data_str = "123456789"; | |||
| SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | |||
| model.input_offset_list_.push_back(0); | |||
| model.input_sizes_.push_back(16); | |||
| model.output_offset_list_.push_back(0); | |||
| model.output_sizes_.push_back(16); | |||
| auto graph = make_shared<ComputeGraph>("graph"); | |||
| auto op_desc = make_shared<OpDesc>("Add", "Add"); | |||
| std::vector<char> kernelBin; | |||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||
| std::string kernel_name("kernel/Add"); | |||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||
| vector<int64_t> shape{16, 16}; | |||
| GeShape ge_shape(shape); | |||
| GeTensorDesc desc(ge_shape); | |||
| op_desc->AddInputDesc(desc); | |||
| op_desc->AddOutputDesc(desc); | |||
| auto node = graph->AddNode(op_desc); | |||
| std::mutex stream_mu_; | |||
| rtStream_t stream_ = nullptr; | |||
| StreamResource stream_resource(0); | |||
| SingleOp single_op(&stream_resource, &stream_mu_, stream_); | |||
| domi::TaskDef task_def; | |||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||
| kernel_with_handle->set_original_kernel_key(""); | |||
| kernel_with_handle->set_node_info(""); | |||
| kernel_with_handle->set_block_dim(32); | |||
| kernel_with_handle->set_args_size(64); | |||
| string args(64, '1'); | |||
| kernel_with_handle->set_args(args.data(), 64); | |||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||
| context->set_op_index(1); | |||
| context->set_kernel_type(2); // ccKernelType::TE | |||
| uint16_t args_offset[9] = {0}; | |||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||
| model.op_list_[1] = node; | |||
| TbeOpTask task_tmp; | |||
| TbeOpTask *task = &task_tmp; | |||
| ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS); | |||
| vector<GeTensorDesc> input_desc; | |||
| vector<DataBuffer> input_buffers; | |||
| vector<GeTensorDesc> output_desc; | |||
| vector<DataBuffer> output_buffers; | |||
| task->node_ = node; | |||
| OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;}; | |||
| OpTilingRegistryInterf("Add", op_tiling_func); | |||
| ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key"); | |||
| ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json"); | |||
| char c = '0'; | |||
| char* buffer = &c; | |||
| task->tiling_buffer_ = buffer; | |||
| task->max_tiling_size_ = 64; | |||
| task->tiling_data_ = "tiling_data"; | |||
| task->arg_size_ = 64; | |||
| uint8_t task_args{0}; | |||
| task->args_.reset(&task_args); | |||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||
| char handle_tmp = '0'; | |||
| char *handle = &handle_tmp; | |||
| task->SetHandle(handle); | |||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||
| } | |||
| @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); | |||
| #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | |||
| #define RT_KERNEL_CUSTOM_AICPU (0x08) | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief kernel mode | |||
| */ | |||
| #define RT_DEFAULT_KERNEL_MODE (0x00) | |||
| #define RT_NORMAL_KERNEL_MODE (0x01) | |||
| #define RT_ALL_KERNEL_MODE (0x02) | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief kernel L1 Fusion Dump bit flags | |||
| @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); | |||
| */ | |||
| RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief register device binary | |||
| * @param [in] bin device binary description | |||
| * @param [out] handle device binary handle | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief register fast memeory device binary | |||
| @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u | |||
| RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
| rtSmDesc_t *smDesc, rtStream_t stream); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief launch kernel with handle to device | |||
| * @param [in] handle program | |||
| * @param [in] devFunc device function description | |||
| * @param [in] blockDim block dimentions | |||
| * @param [in] args argments address for kernel function | |||
| * @param [in] argsSize argements size | |||
| * @param [in] smDesc shared memory description | |||
| * @param [in] stream associated stream | |||
| * @param [in] kernelInfo kernel info | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief launch kernel to device | |||
| @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { | |||
| RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | |||
| RT_MODEL_TASK_STREAM_LABEL_GOTO, | |||
| RT_MODEL_TASK_MODEL_EXIT, | |||
| RT_MODEL_TASK_ALL_KERNEL, | |||
| } rtModelTaskType_t; | |||
| typedef enum tagModelStreamType { | |||
| @@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo { | |||
| uint16_t *argsOffset; | |||
| } rtKernelTaskInfo_t; | |||
| typedef struct tagAllKernelTaskInfo { | |||
| uint16_t blockDim; | |||
| uint16_t argsCount; | |||
| uint16_t argsSize; | |||
| uint16_t reserved; | |||
| const void *dev_func; | |||
| void *handle; | |||
| uint8_t *smDesc; | |||
| uint8_t *args; | |||
| uint16_t *argsOffset; | |||
| } rtAllKernelTaskInfo_t; | |||
| typedef struct tagKernelTaskInfoEx { | |||
| uint32_t flags; | |||
| uint32_t argsSize; | |||
| @@ -251,6 +263,7 @@ typedef struct tagTaskInfo { | |||
| union { | |||
| rtKernelTaskInfoEx_t kernelTaskEx; | |||
| rtKernelTaskInfo_t kernelTask; | |||
| rtAllKernelTaskInfo_t allkernelTask; | |||
| rtEventTaskInfo_t eventTask; | |||
| rtStreamSwitchTaskInfo_t streamSwitchTask; | |||
| rtStreamActiveTaskInfo_t streamActiveTask; | |||