| @@ -229,7 +229,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '/usr/include/*' '*/metadef/*' '*/parser/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| fi | |||
| @@ -27,7 +27,7 @@ ExternalProject_Add(gtest_build | |||
| URL ${REQ_URL} | |||
| TLS_VERIFY OFF | |||
| CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR> | |||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON | |||
| -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE | |||
| BUILD_COMMAND $(MAKE) | |||
| INSTALL_COMMAND $(MAKE) install | |||
| EXCLUDE_FROM_ALL TRUE | |||
| @@ -52,10 +52,27 @@ set_target_properties(gtest_main PROPERTIES | |||
| target_include_directories(gtest INTERFACE ${GTEST_PKG_DIR}/include) | |||
| target_include_directories(gtest_main INTERFACE ${GTEST_PKG_DIR}/include) | |||
| add_library(gmock SHARED IMPORTED) | |||
| set_target_properties(gmock PROPERTIES | |||
| IMPORTED_LOCATION ${GTEST_PKG_DIR}/lib/libgmock.so | |||
| ) | |||
| add_library(gmock_main SHARED IMPORTED) | |||
| set_target_properties(gmock_main PROPERTIES | |||
| IMPORTED_LOCATION ${GTEST_PKG_DIR}/lib/libgmock_main.so | |||
| ) | |||
| target_include_directories(gmock INTERFACE ${GTEST_PKG_DIR}/include) | |||
| target_include_directories(gmock_main INTERFACE ${GTEST_PKG_DIR}/include) | |||
| set(INSTALL_BASE_DIR "") | |||
| set(INSTALL_LIBRARY_DIR lib) | |||
| install(FILES ${GTEST_PKG_DIR}/lib/libgtest.so ${GTEST_PKG_DIR}/lib/libgtest_main.so OPTIONAL | |||
| install(FILES ${GTEST_PKG_DIR}/lib/libgtest.so ${GTEST_PKG_DIR}/lib/libgtest_main.so ${GTEST_PKG_DIR}/lib/libgmock.so ${GTEST_PKG_DIR}/lib/libgmock_main.so OPTIONAL | |||
| DESTINATION ${INSTALL_LIBRARY_DIR}) | |||
| add_dependencies(gtest gtest_build) | |||
| @@ -31,6 +31,7 @@ set(PROTO_HEADER_LIST | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) | |||
| protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||
| protobuf_generate(ge_client PROTO_CLIENT_HEADER_SRCS PROTO_CLIENT_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
| ############ libge_proto_common.a ############ | |||
| @@ -56,7 +57,7 @@ target_link_libraries(ge_proto_common PRIVATE | |||
| ############ libge_proto_client.a ############ | |||
| add_library(ge_proto_client STATIC | |||
| ${PROTO_HEADER_HDRS} | |||
| ${PROTO_CLIENT_HEADER_HDRS} | |||
| ${PROTO_CLIENT_SRCS} | |||
| ) | |||
| @@ -65,6 +66,11 @@ target_compile_definitions(ge_proto_client PRIVATE | |||
| google=ascend_private | |||
| ) | |||
| target_include_directories(ge_proto_client PRIVATE | |||
| ${CMAKE_BINARY_DIR}/proto/ge_client | |||
| ${CMAKE_BINARY_DIR}/proto/ge_client/proto | |||
| ) | |||
| target_compile_options(ge_proto_client PRIVATE | |||
| -O2 | |||
| -fno-common | |||
| @@ -103,6 +109,7 @@ set(TRAIN_SRC_LIST | |||
| "common/profiling/profiling_manager.cc" | |||
| "common/dump/dump_manager.cc" | |||
| "common/dump/dump_properties.cc" | |||
| "common/dump/opdebug_register.cc" | |||
| "common/dump/dump_op.cc" | |||
| "common/profiling/ge_profiling.cc" | |||
| "common/profiling/ge_runner_profiling.cc" | |||
| @@ -319,6 +326,7 @@ set(TRAIN_SRC_LIST | |||
| "graph/passes/variable_ref_useless_control_out_delete_pass.cc" | |||
| "graph/passes/end_of_sequence_add_control_pass.cc" | |||
| "graph/passes/memcpy_addr_async_pass.cc" | |||
| "graph/passes/parallel_group_pass.cc" | |||
| "graph/passes/set_input_output_offset_pass.cc" | |||
| "graph/preprocess/graph_preprocess.cc" | |||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | |||
| @@ -427,6 +435,7 @@ set(INFER_SRC_LIST | |||
| "common/dump/dump_properties.cc" | |||
| "common/dump/dump_manager.cc" | |||
| "common/dump/dump_op.cc" | |||
| "common/dump/opdebug_register.cc" | |||
| "common/dump/dump_server.cc" | |||
| "common/helper/model_cache_helper.cc" | |||
| "ge_local_engine/engine/host_cpu_engine.cc" | |||
| @@ -605,6 +614,7 @@ set(INFER_SRC_LIST | |||
| "graph/passes/hccl_group_pass.cc" | |||
| "graph/passes/memcpy_addr_async_pass.cc" | |||
| "graph/passes/set_input_output_offset_pass.cc" | |||
| "graph/passes/parallel_group_pass.cc" | |||
| "graph/manager/model_manager/event_manager.cc" | |||
| "graph/manager/util/rt_context_util.cc" | |||
| "graph/manager/util/variable_accelerate_ctrl.cc" | |||
| @@ -935,6 +945,10 @@ add_library(atc_stub_ge_compiler SHARED | |||
| add_dependencies(atc_stub_ge_compiler ge_stub) | |||
| target_compile_options(atc_stub_ge_compiler PRIVATE | |||
| -fno-common | |||
| ) | |||
| target_link_libraries(atc_stub_ge_compiler PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| ) | |||
| @@ -971,6 +985,10 @@ add_library(fwk_stub_ge_runner SHARED | |||
| add_dependencies(fwk_stub_ge_runner ge_stub) | |||
| target_compile_options(fwk_stub_ge_runner PRIVATE | |||
| -fno-common | |||
| ) | |||
| target_link_libraries(fwk_stub_ge_runner PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| ) | |||
| @@ -103,7 +103,7 @@ ge::Status Analyzer::Initialize() { | |||
| // Initialize file | |||
| string real_path = RealPath(kFilePath.c_str()); | |||
| if (real_path.empty()) { | |||
| GELOGE(FAILED, "File path is invalid."); | |||
| GELOGE(FAILED, "[Check][AnalyzeFilePath]File path is empty, Path invalid."); | |||
| return FAILED; | |||
| } | |||
| json_file_name_ = real_path + "/" + kAnalyzeFile; | |||
| @@ -155,12 +155,12 @@ std::shared_ptr<GraphInfo> Analyzer::GetJsonObject(uint64_t session_id, uint64_t | |||
| std::lock_guard<std::recursive_mutex> lg(mutex_); | |||
| auto iter = graph_infos_.find(session_id); | |||
| if (iter == graph_infos_.end()) { | |||
| GELOGE(PARAM_INVALID, "session_id:%lu does not exist!", session_id); | |||
| GELOGE(PARAM_INVALID, "[Check][SessionId]session_id:%lu does not exist! graph_id:%lu", session_id, graph_id); | |||
| return nullptr; | |||
| } else { | |||
| auto iter1 = (iter->second).find(graph_id); | |||
| if (iter1 == (iter->second).end()) { | |||
| GELOGE(PARAM_INVALID, "graph_id:%lu does not exist!", graph_id); | |||
| GELOGE(PARAM_INVALID, "[Check][GraphId]graph_id:%lu does not exist! session_id:%lu.", graph_id, session_id); | |||
| return nullptr; | |||
| } | |||
| GELOGI("GetJsonObject Success!session_id:%lu graph_id:%lu", session_id, graph_id); | |||
| @@ -186,11 +186,11 @@ ge::Status Analyzer::CreateAnalyzerFile() { | |||
| std::lock_guard<std::mutex> lg(file_mutex_); | |||
| int fd = open(json_file_name_.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); | |||
| if (fd < 0) { | |||
| GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", json_file_name_.c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[FileOpen][AnalyzeFile]Fail to open the analyze file: %s.", json_file_name_.c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| if (close(fd) != 0) { | |||
| GELOGE(INTERNAL_ERROR, "Fail to close the file: %s.", json_file_name_.c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[FileClose][AnalyzeFile]Fail to close the analyze file: %s.", json_file_name_.c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| is_json_file_create_ = true; | |||
| @@ -200,7 +200,7 @@ ge::Status Analyzer::CreateAnalyzerFile() { | |||
| } | |||
| ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_id) { | |||
| GELOGD("start to save analyze file!"); | |||
| GELOGD("start to save analyze file"); | |||
| auto graph_info = GetJsonObject(session_id, graph_id); | |||
| GE_CHECK_NOTNULL(graph_info); | |||
| @@ -211,7 +211,7 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||
| std::lock_guard<std::mutex> lg(file_mutex_); | |||
| json_file_.open(json_file_name_, std::ios::app); | |||
| if (!json_file_.is_open()) { | |||
| GELOGE(FAILED, "analyzer file does not exist[%s]", json_file_name_.c_str()); | |||
| GELOGE(FAILED, "[Check][AnalyzeFile]analyze file does not exist[%s]", json_file_name_.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||
| try { | |||
| json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | |||
| } catch (nlohmann::detail::type_error &e) { | |||
| GELOGE(FAILED, "analyzer file [%s] failed because [%s]", json_file_name_.c_str(), e.what()); | |||
| GELOGE(FAILED, | |||
| "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s]," | |||
| "session_id:%lu, graph_id:%lu", | |||
| json_file_name_.c_str(), e.what(), session_id, graph_id); | |||
| ret_failed = true; | |||
| } | |||
| json_file_.close(); | |||
| @@ -229,7 +232,7 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||
| } | |||
| ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||
| GELOGD("start to do analyzer!"); | |||
| GELOGD("start to do analyzer process"); | |||
| auto pnode = data_info.node_ptr; | |||
| GE_CHECK_NOTNULL(pnode); | |||
| @@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||
| GE_CHECK_NOTNULL(graph_info); | |||
| auto status = SaveOpInfo(desc, data_info, graph_info); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "save op info failed!"); | |||
| GELOGE(status, | |||
| "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", | |||
| desc->GetName().c_str(), desc->GetType().c_str()); | |||
| return FAILED; | |||
| } | |||
| // create json file | |||
| @@ -69,7 +69,10 @@ Status CheckOptionsValid(const std::map<string, string> &options) { | |||
| auto job_id_iter = options.find(OPTION_EXEC_JOB_ID); | |||
| if (job_id_iter != options.end()) { | |||
| if (job_id_iter->second.length() > kMaxStrLen) { | |||
| GELOGE(PARAM_INVALID, "CheckOptionsValid job_id failed, string len > %d", kMaxStrLen); | |||
| GELOGE(PARAM_INVALID,"[Check][JobId]Failed," | |||
| "the job_id [%s] string length > max string length: %d", | |||
| job_id_iter->second.c_str(), kMaxStrLen); | |||
| REPORT_INPUT_ERROR("E10051", std::vector<std::string>({"id","length"}), std::vector<std::string>({job_id_iter->second, std::to_string(kMaxStrLen)})); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -84,7 +87,8 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||
| std::string path_base = ge::GELib::GetPath(); | |||
| auto ret = ErrorManager::GetInstance().Init(path_base); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(GE_CLI_INIT_FAILED, "ErrorManager init fail"); | |||
| GELOGE(GE_CLI_INIT_FAILED, | |||
| "[Init][PathBase]Init failed when pass param path_base:%s", path_base.c_str()); | |||
| return ret; | |||
| } | |||
| @@ -104,7 +108,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||
| bool is_proto_init = manager->Initialize(option_tmp); | |||
| GE_TIMESTAMP_END(GEInitialize, "GEInitialize::ManagerInitialize"); | |||
| if (!is_proto_init) { | |||
| GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, ops proto path is invalid."); | |||
| GELOGE(GE_CLI_INIT_FAILED, | |||
| "[Init][OpsProtoPath]Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid.", | |||
| opsproto_path.c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -127,7 +133,7 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||
| ret = ge::GELib::Initialize(options); | |||
| GE_TIMESTAMP_END(GELibInitialize, "GEInitialize::GELibInitialize"); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, error code = %u", ret); | |||
| GELOGE(GE_CLI_INIT_FAILED, "[Init][GELib]Failed, error code = %u", ret); | |||
| return FAILED; | |||
| } | |||
| @@ -155,7 +161,9 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||
| std::map<std::string, std::string> str_options; | |||
| for (auto &option : options) { | |||
| if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { | |||
| GELOGE(FAILED, "GEInitialize options is nullptr."); | |||
| GELOGE(FAILED, "[Check][Param]Options invalid, first or second option is nullptr."); | |||
| REPORT_INNER_ERROR("E19999", "Check parameter's options invalid," | |||
| "the first or second option is nullptr."); | |||
| return FAILED; | |||
| } | |||
| std::string key = option.first.GetString(); | |||
| @@ -171,17 +179,17 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||
| // GE finalize, releasing all resources | |||
| Status GEFinalize() { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||
| GELOGT(TRACE_INIT, "GEFinalize start"); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||
| // check init status | |||
| if (!g_ge_initialized) { | |||
| GELOGW("GEFinalize is called before GEInitialize"); | |||
| GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); | |||
| return SUCCESS; | |||
| } | |||
| std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| GELOGT(TRACE_INIT, "GEFinalize start"); | |||
| // call Finalize | |||
| Status ret = SUCCESS; | |||
| Status middle_ret; | |||
| @@ -237,13 +245,17 @@ Session::Session(const std::map<string, string> &options) { | |||
| // check init status | |||
| sessionId_ = 0; | |||
| if (!g_ge_initialized) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized."); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Construct][Session]Failed because lack GEInitialize call before."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Creating session failed because lack GEInitialize call before."); | |||
| return; | |||
| } | |||
| // call Initialize | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session Constructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Construct][Session]Failed, GELib instance is nullptr or it is not InitFlag"); | |||
| return; | |||
| } | |||
| @@ -256,7 +268,7 @@ Session::Session(const std::map<string, string> &options) { | |||
| if (ret == SUCCESS) { | |||
| sessionId_ = session_id; | |||
| } else { | |||
| GELOGE(ret, "Session constructor failed, session Id not initialized"); | |||
| GELOGE(ret, "[Construct][Session]Failed, error code:%u.", ret); | |||
| return; | |||
| } | |||
| GELOGT(TRACE_STOP, "Session Constructor finished"); | |||
| @@ -270,13 +282,17 @@ Session::Session(const std::map<AscendString, AscendString> &options) { | |||
| // check init status | |||
| sessionId_ = 0; | |||
| if (!g_ge_initialized) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized."); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Construct][Session]Failed because lack GEInitialize call before."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Creating session failed because lack GEInitialize call before."); | |||
| return; | |||
| } | |||
| // call Initialize | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session Constructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Construct][Session]Failed, the GELib instance is nullptr or is not InitFlag"); | |||
| return; | |||
| } | |||
| @@ -284,7 +300,9 @@ Session::Session(const std::map<AscendString, AscendString> &options) { | |||
| std::map<std::string, std::string> str_options; | |||
| for (auto &option : options) { | |||
| if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { | |||
| GELOGE(FAILED, "Session options is nullptr."); | |||
| GELOGE(FAILED, "[Construct][Session]Failed, the first or second option is nullptr."); | |||
| REPORT_INNER_ERROR("E19999", "Creating session's options invalid," | |||
| "the first or second option is nullptr."); | |||
| return; | |||
| } | |||
| std::string key = option.first.GetString(); | |||
| @@ -299,7 +317,7 @@ Session::Session(const std::map<AscendString, AscendString> &options) { | |||
| if (ret == SUCCESS) { | |||
| sessionId_ = session_id; | |||
| } else { | |||
| GELOGE(ret, "Session constructor failed, session Id not initialized"); | |||
| GELOGE(ret, "[Construct][Session]Failed, error code:%u.", ret); | |||
| return; | |||
| } | |||
| GELOGT(TRACE_STOP, "Session Constructor finished"); | |||
| @@ -331,17 +349,18 @@ Session::~Session() { | |||
| ret = instance_ptr->SessionManagerObj().DestroySession(session_id); | |||
| } catch (google::protobuf::FatalException &e) { | |||
| GELOGE(GE_CLI_SESS_DESTROY_FAILED, "SessionDestructor throws FatalException"); | |||
| GELOGE(GE_CLI_SESS_DESTROY_FAILED, "[Destruct][Session]Failed because get fatalException."); | |||
| } | |||
| // check return status, return, update session id if success | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Session Destructor failed"); | |||
| GELOGE(ret, "[Destruct][Session]Failed, error code:%u.", ret); | |||
| } | |||
| GELOGT(TRACE_STOP, "Session Destructor finished"); | |||
| } | |||
| // Add Graph | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| std::map<std::string, std::string> options; | |||
| @@ -349,25 +368,32 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | |||
| return AddGraph(graph_id, graph, options); | |||
| } | |||
| // Add Graph | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Add][Graph]Failed because GELib instance is nullptr or it is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "AddGraph Failed, GELib instance is nullptr or it is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGD("Adding graph to session"); | |||
| Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, options); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "AddGraph failed in Session."); | |||
| GELOGE(ret, | |||
| "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| GELOGD("AddGraph finished in Session."); | |||
| return ret; | |||
| } | |||
| //Add Graph | |||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||
| const std::map<AscendString, AscendString> &options) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| @@ -375,14 +401,19 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Add][Graph]Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "AddGraph Failed, GELib instance is nullptr or it is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGD("Adding graph to session"); | |||
| std::map<std::string, std::string> str_options; | |||
| for (auto &option : options) { | |||
| if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { | |||
| GELOGE(FAILED, "AddGraph options is nullptr."); | |||
| GELOGE(FAILED, "[Add][Graph]Failed, the first or second option is nullptr."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Add Graph Failed, the first or second option is nullptr."); | |||
| return FAILED; | |||
| } | |||
| std::string key = option.first.GetString(); | |||
| @@ -391,7 +422,9 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, | |||
| } | |||
| Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, str_options); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "AddGraph failed in Session."); | |||
| GELOGE(ret, | |||
| "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| GELOGD("AddGraph finished in Session."); | |||
| @@ -405,6 +438,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { | |||
| return AddGraphWithCopy(graph_id, graph, options); | |||
| } | |||
| // Add Graph With Copy | |||
| Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||
| const std::map<AscendString, AscendString> &options) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| @@ -412,7 +446,10 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Add][Graph]Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "AddGraph Failed, GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| std::map<std::string, std::string> str_options; | |||
| @@ -422,13 +459,16 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, | |||
| GELOGD("Adding graph to session"); | |||
| Status ret = instance_ptr->SessionManagerObj().AddGraphWithCopy(sessionId_, graph_id, graph, str_options); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "AddGraph failed in Session."); | |||
| GELOGE(ret, | |||
| "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| GELOGD("AddGraph finished in Session."); | |||
| return ret; | |||
| } | |||
| // Remove Graph | |||
| Status Session::RemoveGraph(uint32_t graph_id) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| GELOGT(TRACE_INIT, "Session RemoveGraph start"); | |||
| @@ -437,7 +477,10 @@ Status Session::RemoveGraph(uint32_t graph_id) { | |||
| // call RemoveGraph | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (!instance_ptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RemoveGraph failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Remove][Graph]Failed, GELib instance is nullptr or is not InitFlag "); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "RemoveGraph Failed, GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| @@ -445,13 +488,16 @@ Status Session::RemoveGraph(uint32_t graph_id) { | |||
| Status ret = instance_ptr->SessionManagerObj().RemoveGraph(sessionId_, graph_id); | |||
| // check return status, return | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "session RemoveGraph failed"); | |||
| GELOGE(ret, | |||
| "[Remove][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_STOP, "Session RemoveGraph finished"); | |||
| return ret; | |||
| } | |||
| // Print Output Result | |||
| void PrintOutputResult(std::vector<Tensor> &outputs) { | |||
| if (outputs.empty() || outputs[0].GetData() == nullptr) { | |||
| GELOGW("outputs is empty or data is nullptr."); | |||
| @@ -499,6 +545,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) { | |||
| } | |||
| } | |||
| // Run Graph | |||
| Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| GELOGT(TRACE_INIT, "Session RunGraph start"); | |||
| @@ -508,14 +555,19 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s | |||
| // call RunGraph | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RunGraph failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Run][Graph]Failed, GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "RunGraph Failed, GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Running Graph"); | |||
| Status ret = instance_ptr->SessionManagerObj().RunGraph(sessionId_, graph_id, graph_inputs, outputs); | |||
| // check return status | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Session RunGraph failed"); | |||
| GELOGE(ret, | |||
| "[Run][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| @@ -537,7 +589,7 @@ Status Session::RunGraphWithStreamAsync(uint32_t graph_id, const std::vector<Ten | |||
| std::vector<Tensor> graph_inputs = inputs; | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RunGraph failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge instace init failed"); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Running Graph"); | |||
| @@ -573,30 +625,40 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu | |||
| return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, str_key, callback); | |||
| } | |||
| // Build Graph | |||
| Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Build][Graph]Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Build graph failed, the GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Building Graph"); | |||
| Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Session BuildGraph failed"); | |||
| GELOGE(ret, | |||
| "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| // Run Graph Asynchronously | |||
| Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | |||
| RunAsyncCallback callback) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | |||
| ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Run][Graph]RunGraphAsyncFailed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "RunGraphAsync Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Run Graph Asynchronously"); | |||
| @@ -605,49 +667,59 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn | |||
| Status ret = ge::GELib::GetInstance()->SessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, callback); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "SessionManager RunGraphAsync failed"); | |||
| GELOGE(ret, "[Run][Graph]RunGraphAsync Failed, error code:%u, session_id:%lu, graph_id:%u.", | |||
| ret, sessionId_, graph_id); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| // Get Variables | |||
| Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| auto instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Get][Variables]Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "GetVariables failed, the GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Get Variables"); | |||
| Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, var_names, var_values); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "SessionManager RunGraphAsync failed"); | |||
| GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| // Get Variables | |||
| Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| auto instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, | |||
| "[Get][Variables]Failed, the GELib instance is nullptr or is not InitFlag."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "GetVariables failed, the GELib instance is nullptr or is not InitFlag."); | |||
| return FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Get Variables"); | |||
| std::vector<ge::string> str_var_names; | |||
| for (auto &var_name : var_names) { | |||
| if (var_name.GetString() == nullptr) { | |||
| GELOGE(FAILED, "GetVariables name is nullptr."); | |||
| GELOGE(FAILED, "[Get][Variable]Failed, variables' names are nullptr."); | |||
| REPORT_INNER_ERROR("E19999", "GetVariables failed, variables' names are nullptr."); | |||
| return FAILED; | |||
| } | |||
| str_var_names.emplace_back(var_name.GetString()); | |||
| } | |||
| Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, str_var_names, var_values); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "SessionManager RunGraphAsync failed"); | |||
| GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| @@ -16,6 +16,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "context/ctx.cc" | |||
| @@ -127,7 +128,7 @@ target_link_libraries(ge_common PRIVATE | |||
| ) | |||
| ############ libge_common.a ############ | |||
| add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||
| target_compile_definitions(ge_common_static PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| HOST_VISIBILITY | |||
| @@ -158,7 +159,7 @@ target_include_directories(ge_common_static PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_static | |||
| #### yellow zone #### | |||
| ${GE_DEPEND_DIR}/inc | |||
| ${GE_DEPEND_DIR}/inc/cce | |||
| @@ -25,7 +25,7 @@ void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) | |||
| } | |||
| void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> &op_desc) const { | |||
| GELOGD("LoadCustAICPUKernelBinToOpDesc in"); | |||
| GELOGD("LoadCustAICPUKernelBinToOpDesc in."); | |||
| if (op_desc != nullptr) { | |||
| auto kernel_bin = FindKernel(op_desc->GetName()); | |||
| if (kernel_bin != nullptr) { | |||
| @@ -34,6 +34,6 @@ void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr< | |||
| GELOGI("Load cust aicpu kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | |||
| } | |||
| } | |||
| GELOGD("LoadCustAICPUKernelBinToOpDesc success"); | |||
| GELOGD("LoadCustAICPUKernelBinToOpDesc success."); | |||
| } | |||
| } // namespace ge | |||
| @@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties( | |||
| uint64_t session_id) { | |||
| std::lock_guard<std::mutex> lock(mutex_); | |||
| // If session_id is not found in dump_properties_map_, operator[] will insert one. | |||
| return dump_properties_map_[session_id]; | |||
| auto iter = dump_properties_map_.find(session_id); | |||
| if (iter != dump_properties_map_.end()) { | |||
| return iter->second; | |||
| } | |||
| static DumpProperties default_properties; | |||
| return default_properties; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties( | |||
| @@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() { | |||
| op_mapping_info.set_dump_path(dump_path); | |||
| op_mapping_info.set_flag(kAicpuLoadFlag); | |||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
| if (!dynamic_model_name_.empty()) { | |||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||
| if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||
| op_mapping_info.set_model_name(dynamic_model_name_); | |||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||
| } | |||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
| @@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() { | |||
| } | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
| if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { | |||
| auto ret = DumpOutput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Dump output failed when in dumping all"); | |||
| @@ -81,11 +81,11 @@ class DumpProperties { | |||
| const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} | |||
| private: | |||
| void CopyFrom(const DumpProperties &other); | |||
| void SetDumpDebugOptions(); | |||
| std::string enable_dump_; | |||
| std::string enable_dump_debug_; | |||
| @@ -0,0 +1,148 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "opdebug_register.h" | |||
| namespace { | |||
| const size_t kOpDebugMemorySize = 2048UL; | |||
| const size_t kDebugP2pSize = 8UL; | |||
| } // namespace | |||
| namespace ge { | |||
| OpdebugRegister::~OpdebugRegister() {} | |||
| Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
| GELOGD("Start to register debug for model in overflow"); | |||
| auto ret = MallocMemForOpdebug(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret); | |||
| return ret; | |||
| } | |||
| uint32_t debug_stream_id = 0; | |||
| uint32_t debug_task_id = 0; | |||
| auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); | |||
| data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
| return SUCCESS; | |||
| } | |||
| void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) { | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| if (model_handle != nullptr) { | |||
| GELOGD("start to call rtDebugUnRegister in model overflow."); | |||
| rt_ret = rtDebugUnRegister(model_handle); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
| GELOGD("Start to register debug for stream in stream overflow"); | |||
| auto ret = MallocMemForOpdebug(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret); | |||
| return ret; | |||
| } | |||
| uint32_t debug_stream_id = 0; | |||
| uint32_t debug_task_id = 0; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| #endif | |||
| GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | |||
| data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
| return SUCCESS; | |||
| } | |||
| void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| if (stream != nullptr) { | |||
| GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | |||
| rt_ret = rtDebugUnRegisterForStream(stream); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| #endif | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| Status OpdebugRegister::MallocMemForOpdebug() { | |||
| rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
| // For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
| rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| #define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| #include <map> | |||
| #include "common/debug/ge_log.h" | |||
| #include "common/debug/log.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| namespace ge { | |||
| class OpdebugRegister { | |||
| public: | |||
| OpdebugRegister() = default; | |||
| ~OpdebugRegister(); | |||
| Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
| void UnregisterDebugForModel(rtModel_t model_handle); | |||
| Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
| void UnregisterDebugForStream(rtStream_t stream); | |||
| private: | |||
| Status MallocMemForOpdebug(); | |||
| void *op_debug_addr_ = nullptr; | |||
| void *p2p_debug_addr_ = nullptr; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| @@ -111,7 +111,7 @@ Status CastKernel(const CastArgs &args, uint8_t *dst, const size_t data_size, co | |||
| }; | |||
| auto it = transfer_handle.find(trans_mode); | |||
| if (it == transfer_handle.end()) { | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } else { | |||
| return (it->second)(args, dst, data_size); | |||
| } | |||
| @@ -127,8 +127,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||
| std::string error = "Failed to trans data from datatype " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + " , it is not supported."; | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_DATATYPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| auto trans_mode = iter->second; | |||
| @@ -136,14 +136,14 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||
| if (size <= 0) { | |||
| std::string error = "Failed to calc size from data type" + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + ", it is not supported."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| return PARAM_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_DATATYPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (args.src_data_size > static_cast<size_t>(SIZE_MAX / size)) { | |||
| std::string error = "args.src_data_size" + FmtToStr(args.src_data_size) + | |||
| " or data type size" + FmtToStr(size) + " is too big"; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| return PARAM_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_PARAM_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| size_t total_size = static_cast<size_t>(args.src_data_size * size); | |||
| result.length = total_size; | |||
| @@ -154,8 +154,9 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||
| return OUT_OF_MEMORY; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| if (CastKernel(args, dst.get(), args.src_data_size, trans_mode) != SUCCESS) { | |||
| @@ -163,8 +164,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + ", data size is " + | |||
| FmtToStr(std::to_string(args.src_data_size)); | |||
| GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error.c_str()); | |||
| return INTERNAL_ERROR; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_INTERNAL_ERROR, error.c_str()); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| result.data = dst; | |||
| return SUCCESS; | |||
| @@ -39,22 +39,22 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| std::string error = "Failed to trans shape from NC1HWNCoC0 to HWCN, invalid data type" + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_DATATYPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kC1hwncoc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(dst_shape, kHwcnDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s.", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto cube_size = GetCubeSizeByDataType(args.src_data_type); | |||
| if (src_shape.at(kC1hwncoc0C1) != (dst_shape.at(kHwcnC) - 1) / cube_size + 1 || | |||
| @@ -63,8 +63,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||
| src_shape.at(kC1hwncoc0C0) != cube_size) { | |||
| std::string error = "Failed to check relationship between src and dst shape, src shape" + | |||
| FmtToStr(ShapeToString(src_shape)) + ", dst shape" + FmtToStr(ShapeToString(dst_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| return PARAM_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -73,10 +73,11 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto h = args.src_shape.at(kC1hwncoc0H); | |||
| @@ -114,12 +115,12 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from C1HWNCoC0[%ld, %ld, %ld, %ld, %ld, %ld] offset %ld to " | |||
| "HWCN[%ld, %ld, %ld, %ld] offset %ld, err-code %d", | |||
| c1_idx, h_idx, w_idx, n_idx, co_idx, c0_idx, src_offset, h_idx, w_idx, c_idx, n_idx, dst_offset, | |||
| ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -132,8 +133,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size | |||
| } // namespace | |||
| Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForC1hwncoc0ToHwcn(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForC1hwncoc0ToHwcn(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| int64_t total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -143,26 +145,27 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu | |||
| result.length = static_cast<size_t>(total_size); | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s.", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from C1HWNCoC0 to HWCN, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| GELOGD("Begin to trans format from C1HWNCoC0 to HWCN, src shape %s, data type %s, dst shape %s, memory size %ld.", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported."); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) | |||
| @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto d = src_shape.at(kDhwcnD); | |||
| auto h = src_shape.at(kDhwcnH); | |||
| @@ -62,7 +62,7 @@ Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType dat | |||
| } | |||
| Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||
| if (!CheckShapeValid(args.src_shape, kDhwcnDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t d = args.src_shape[kDhwcnD]; | |||
| int64_t h = args.src_shape[kDhwcnH]; | |||
| @@ -94,10 +94,11 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| for (int64_t di = 0; di < d; di++) { | |||
| @@ -122,9 +123,10 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -149,28 +151,28 @@ Status FormatTransferDhwcnFractalZ3D::TransFormat(const TransArgs &args, TransRe | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (args.src_format == FORMAT_DHWCN && args.dst_format == FORMAT_FRACTAL_Z_3D) { | |||
| return TransFormatDhwckToFz3D(args, result); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { | |||
| return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); | |||
| } | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) | |||
| @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||
| Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto d = src_shape.at(kDhwncD); | |||
| auto h = src_shape.at(kDhwncH); | |||
| @@ -62,7 +62,7 @@ Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, Dat | |||
| } | |||
| Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &result) { | |||
| if (!CheckShapeValid(args.src_shape, kDhwncDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t d = args.src_shape[kDhwncD]; | |||
| int64_t h = args.src_shape[kDhwncH]; | |||
| @@ -95,10 +95,11 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| for (int64_t di = 0; di < d; di++) { | |||
| @@ -123,9 +124,10 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -150,28 +152,28 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransFormat(const TransArgs &args | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (args.src_format == ge::FORMAT_DHWNC && args.dst_format == ge::FORMAT_FRACTAL_Z_3D_TRANSPOSE) { | |||
| return TransFormatDhwncToFz3DTranspose(args, result); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { | |||
| return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); | |||
| } | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) | |||
| @@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(DIM_DEFAULT_VALUE); | |||
| hw_shape.push_back(src_shape[kNdDimIndexN]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| default: | |||
| @@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -117,14 +117,14 @@ Status CheckShapeRelation(const TransArgs &args, ShapeVector &hw_shape) { | |||
| ShapeVector expect_src_shape; | |||
| auto ret = TransShapeToFracNz(args.dst_shape, args.src_data_type, expect_src_shape, hw_shape); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Trans shape from %s to %s, shape %s to %s, data type %s failed", | |||
| GELOGE(ret, "Trans shape from %s to %s, shape %s to %s, data type %s failed", | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), ShapeToString(args.dst_shape).c_str(), | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeSrcCorrect(args, expect_src_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -139,10 +139,11 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| // src&dst_shape can be written as times*H*W & times*W1*H1*H0*W0, respectively. dst_shape_size >= kDimNum4D | |||
| @@ -175,8 +176,9 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| auto w1_head = num_w1 * w0; | |||
| @@ -189,8 +191,9 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -210,10 +213,11 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto times = dst_hw_shape.at(kNdDimIndexN); | |||
| @@ -246,8 +250,9 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| auto w1_head = num_w1 * w0; | |||
| @@ -260,8 +265,9 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -273,13 +279,21 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| } // namespace | |||
| Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type) || !CheckShape(args.src_format, args.src_shape) || | |||
| !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| @@ -292,7 +306,7 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return TransFormatFromNdToFracNz(args, result, hw_shape); | |||
| } | |||
| @@ -300,31 +314,40 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| if (!IsDataTypeSupport(data_type)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(src_format, src_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| ShapeVector hw_shape; | |||
| return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); | |||
| } | |||
| Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type) || !IsShapeValid(args.src_shape) || | |||
| !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| @@ -332,8 +355,9 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| ShapeVector hw_shape; | |||
| if (CheckShapeRelation(args, hw_shape) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckShapeRelation(args, hw_shape); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| return TransFormatFromFracNzToNd(args, result, hw_shape); | |||
| } | |||
| @@ -342,7 +366,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) | |||
| @@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||
| Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| auto c1 = Ceil(c, c0); | |||
| @@ -54,16 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ | |||
| dst_shape.push_back(kNiSize); | |||
| dst_shape.push_back(c0); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNchwN); | |||
| @@ -75,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||
| Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto h = src_shape.at(kHwcnH); | |||
| @@ -88,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||
| Status TransShapeNhwcToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNhwcN); | |||
| @@ -127,10 +127,11 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY;); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| for (int64_t vfi = 0; vfi < vf_cnt; vfi++) { | |||
| // vertical fractal matrix base index | |||
| @@ -163,8 +164,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||
| if (protected_size < size) { | |||
| std::string error = "Failed to operate the dst memory, protected_size is " + | |||
| FmtToStr(protected_size) + " and size is " + FmtToStr(size); | |||
| GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error.c_str()); | |||
| return INTERNAL_ERROR; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_PARAM_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| char *dst_data = reinterpret_cast<char *>(dst.get() + offset); | |||
| const char *src_data = reinterpret_cast<const char *>(args.data + src_offset * size); | |||
| @@ -173,9 +174,10 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset, | |||
| ret, need_pad_zero); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -213,10 +215,11 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY;); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| for (int64_t c1i = 0; c1i < c1; c1i++) { | |||
| for (int64_t hi = 0; hi < h; hi++) { | |||
| @@ -235,9 +238,10 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| static_cast<size_t>(data_size)); | |||
| } else { | |||
| if (protected_size < data_size) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| protected_size, data_size); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| int64_t src_idx = hi * wcn + wi * cn + (c1i * c0 + c0i) * n + n1n0i; | |||
| char *dst_data = reinterpret_cast<char *>(dst.get() + dst_offset); | |||
| @@ -247,9 +251,10 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -288,10 +293,11 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY;); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| for (int64_t c1i = 0; c1i < c1; c1i++) { | |||
| for (int64_t hi = 0; hi < h; hi++) { | |||
| @@ -310,9 +316,10 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| static_cast<size_t>(data_size)); | |||
| } else { | |||
| if (protected_size < data_size) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| protected_size, data_size); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| int64_t src_idx = n1n0i * hwc + hi * wc + wi * c + (c1i * c0 + c0i); | |||
| char *dst_data = reinterpret_cast<char *>(dst.get() + dst_offset); | |||
| @@ -322,9 +329,10 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -349,7 +357,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (args.src_format == FORMAT_NHWC && args.dst_format == FORMAT_FRACTAL_Z) { | |||
| @@ -364,13 +372,13 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r | |||
| return TransFormatFromNchwToFz(args, result); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { | |||
| @@ -383,7 +391,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<i | |||
| return TransShapeNchwToFz(src_shape, data_type, dst_shape); | |||
| } | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalZ, FORMAT_NCHW, FORMAT_FRACTAL_Z) | |||
| @@ -86,9 +86,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(DIM_DEFAULT_VALUE); | |||
| hw_shape.push_back(src_shape[kNdDimIndexN]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| default: | |||
| @@ -106,9 +106,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | |||
| hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -118,14 +118,14 @@ Status CheckShapeRelation(const TransArgs &args, ShapeVector &hw_shape) { | |||
| ShapeVector expect_src_shape; | |||
| auto ret = TransShapeToFracZz(args.dst_shape, args.src_data_type, expect_src_shape, hw_shape); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Trans shape from %s to %s, shape %s to %s, data type %s failed", | |||
| GELOGE(ret, "Trans shape from %s to %s, shape %s to %s, data type %s failed", | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), ShapeToString(args.dst_shape).c_str(), | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeSrcCorrect(args, expect_src_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -140,10 +140,11 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D | |||
| auto times = hw_shape.at(kNdDimIndexN); | |||
| @@ -179,8 +180,9 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| auto w1_head = num_w1 * w0; | |||
| @@ -195,8 +197,9 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -217,10 +220,11 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D | |||
| @@ -257,8 +261,9 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| auto w1_head = num_w1 * w0; | |||
| @@ -273,8 +278,9 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -287,13 +293,21 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| } // namespace | |||
| Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type) || !CheckShape(args.src_format, args.src_shape) || | |||
| !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| @@ -306,7 +320,7 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult & | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return TransFormatFromNdToFracZz(args, result, hw_shape); | |||
| } | |||
| @@ -314,31 +328,40 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| if (!IsDataTypeSupport(data_type)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(src_format, src_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||
| ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| ShapeVector hw_shape; | |||
| return TransShapeToFracZz(src_shape, data_type, dst_shape, hw_shape); | |||
| } | |||
| Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type) || !IsShapeValid(args.src_shape) || | |||
| !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(PARAM_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| @@ -346,8 +369,9 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| ShapeVector hw_shape; | |||
| if (CheckShapeRelation(args, hw_shape) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckShapeRelation(args, hw_shape); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| return TransFormatFromFracZzToNd(args, result, hw_shape); | |||
| } | |||
| @@ -356,7 +380,7 @@ Status FormatTransferFractalZzND::TransShape(Format src_format, const ShapeVecto | |||
| Format dst_format, ShapeVector &dst_shape) { | |||
| GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | |||
| TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFractalZz, FORMAT_ND, FORMAT_FRACTAL_ZZ) | |||
| @@ -37,25 +37,25 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from FORMAT_FRACTAL_Z to HWCN, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from FORMAT_FRACTAL_Z to HWCN, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kFracZDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(dst_shape, kHwcnDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 < 0) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| int64_t c1 = Ceil(dst_shape.at(kHwcnC), c0); | |||
| int64_t n0 = Ceil(dst_shape.at(kHwcnN), static_cast<int64_t>(kNiSize)); | |||
| @@ -64,8 +64,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||
| std::string error = "Failed to check relationship between src shape" + | |||
| FmtToStr(ShapeToString(src_shape)) + " and dst shape" + | |||
| FmtToStr(ShapeToString(dst_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return PARAM_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -74,10 +74,11 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto n0 = args.src_shape.at(kFracZN0); | |||
| @@ -113,11 +114,11 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from FracZ offset %ld to HWCN[%ld, %ld, %ld, %ld] " | |||
| "offset %ld, err-code %d", | |||
| src_offset, h_idx, w_idx, c_idx, n_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -130,8 +131,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForFracZToHwcn(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForFracZToHwcn(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -142,18 +144,19 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from FracZ to HWCN, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -161,7 +164,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFracZHwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) | |||
| @@ -37,33 +37,34 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from FORMAT_FRACTAL_Z to NCHW, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from FORMAT_FRACTAL_Z to NCHW, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kFracZDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(dst_shape, kNchwDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 < 0) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | |||
| int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 || | |||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || | |||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -72,10 +73,11 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto n0 = args.src_shape.at(kFracZN0); | |||
| @@ -111,11 +113,11 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from FracZ offset %ld to NCHW[%ld, %ld, %ld, %ld] offset %ld, " | |||
| "err-code %d", | |||
| src_offset, n_idx, c_idx, h_idx, w_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -128,8 +130,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForFracZToNchw(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForFracZToNchw(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -140,19 +143,20 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from FracZ to NCHW, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -160,7 +164,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) | |||
| @@ -37,33 +37,34 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from FORMAT_FRACTAL_Z to NHWC, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from FORMAT_FRACTAL_Z to NHWC, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kFracZDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(dst_shape, kNhwcDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 < 0) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | |||
| int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 || | |||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || | |||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -72,10 +73,11 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto n0 = args.src_shape.at(kFracZN0); | |||
| @@ -111,10 +113,10 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from FracZ offset %ld to HHWC[%ld, %ld, %ld, %ld] offset %ld, err-code %d", | |||
| src_offset, n_idx, h_idx, w_idx, c_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -127,8 +129,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size | |||
| } // namespace | |||
| Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForFracZToNhwc(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForFracZToNhwc(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -139,18 +142,19 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -158,7 +162,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & | |||
| Status FormatTransferFracZNhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | |||
| Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from FracZ to NHWC is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferFracZNhwc, FORMAT_FRACTAL_Z, FORMAT_NHWC) | |||
| @@ -43,9 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector<in | |||
| dst_shape.push_back(cube_size); | |||
| dst_shape.push_back(cube_size); | |||
| if (!CheckShapeValid(dst_shape, kC1hwncoc0DimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -55,21 +55,21 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from HWCN to C1HWNCoC0, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from HWCN to C1HWNCoC0, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.src_shape, kHwcnDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.dst_shape, kC1hwncoc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| std::vector<int64_t> expect_dst_shape; | |||
| auto ret = TransShapeHwcnToC1hwncoc0(args.src_data_type, args.src_shape, expect_dst_shape); | |||
| @@ -77,12 +77,12 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||
| return ret; | |||
| } | |||
| if (args.dst_shape != expect_dst_shape) { | |||
| GELOGE(PARAM_INVALID, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to trans format, src and dst shape are not compatible. src shape %s, dst shape %s, " | |||
| "expect dst shape %s", | |||
| ShapeToString(args.src_shape).c_str(), ShapeToString(args.dst_shape).c_str(), | |||
| ShapeToString(expect_dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -91,10 +91,11 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto h = args.src_shape.at(kHwcnH); | |||
| @@ -135,22 +136,22 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from HWCN[%ld, %ld, %ld, %ld] offset %ld to " | |||
| "C1HWNCoC0[%ld, %ld, %ld, %ld, %ld, %ld] offset %ld, err-code %d", | |||
| h_idx, w_idx, c_idx, n_idx, src_offset, c1_idx, h_idx, w_idx, n_idx, co_idx, c0_idx, | |||
| dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } else { | |||
| auto ret = | |||
| memset_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), 0, static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to set to 0 to C1HWNCoC0[%ld, %ld, %ld, %ld, %ld, %ld] offset %ld, " | |||
| "err-code %d", | |||
| c1_idx, h_idx, w_idx, n_idx, co_idx, c0_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -166,8 +167,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferHwcnC1hwncoc0::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForHwcnToC1hwncoc0(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForHwcnToC1hwncoc0(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -178,18 +180,20 @@ Status FormatTransferHwcnC1hwncoc0::TransFormat(const TransArgs &args, TransResu | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from HWCN to C1HWNCoC0, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -198,15 +202,15 @@ Status FormatTransferHwcnC1hwncoc0::TransShape(Format src_format, const std::vec | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { | |||
| if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); | |||
| } else if (src_format != FORMAT_HWCN) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } else { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| } | |||
| @@ -37,33 +37,34 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from NC1HWC0 to NCHW, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from NC1HWC0 to NCHW, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.src_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.dst_shape, kNchwDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNchwH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNchwW) || | |||
| src_shape.at(kNc1hwc0N) != dst_shape.at(kNchwN) || src_shape.at(kNc1hwc0C0) != c0 || | |||
| src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNchwC), c0))) { | |||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -72,10 +73,11 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto h = args.src_shape.at(kNc1hwc0H); | |||
| @@ -109,11 +111,11 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld to NCHW[%ld, %ld, %ld, %ld]" | |||
| " offset %ld, err-code %d", | |||
| n_idx, c1_idx, h_idx, w_idx, c0_idx, src_offset, n_idx, c_idx, h_idx, w_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -126,8 +128,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferNc1hwc0Nchw::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForNc1hwc0ToNchw(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForNc1hwc0ToNchw(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -138,18 +141,19 @@ Status FormatTransferNc1hwc0Nchw::TransFormat(const TransArgs &args, TransResult | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from NC1HWC0 to NCHW, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -157,7 +161,7 @@ Status FormatTransferNc1hwc0Nchw::TransFormat(const TransArgs &args, TransResult | |||
| Status FormatTransferNc1hwc0Nchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from NC1HWC0 to NCHW is not unique. Trans shape in this direction is not supported"); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nchw, FORMAT_NC1HWC0, FORMAT_NCHW) | |||
| @@ -37,33 +37,34 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from NC1HWC0 to NHWC, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from NC1HWC0 to NHWC, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.src_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.dst_shape, kNhwcDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | |||
| src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | |||
| src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | |||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -72,10 +73,11 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto h = args.src_shape.at(kNc1hwc0H); | |||
| @@ -109,11 +111,11 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld to NHWC[%ld, %ld, %ld, %ld]" | |||
| " offset %ld, err-code %d", | |||
| n_idx, c1_idx, h_idx, w_idx, c0_idx, src_offset, n_idx, c_idx, h_idx, w_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -126,8 +128,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForNc1hwc0ToNhwc(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForNc1hwc0ToNhwc(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -138,18 +141,20 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from NC1HWC0 to NCHW, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -157,7 +162,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult | |||
| Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) | |||
| @@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||
| Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| auto chw = c * h * w; | |||
| @@ -59,9 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||
| dst_shape.push_back(c0); | |||
| if (!IsShapeValid(dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -69,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||
| Status TransShapeNchwToFzC04(const std::vector<int64_t> &src_shape, DataType data_type, | |||
| std::vector<int64_t> &dst_shape) { | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto n = src_shape.at(kNchwN); | |||
| @@ -94,8 +94,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| std::vector<int64_t> expect_shape = {n, h, w, c}; | |||
| auto ret = ge::formats::Transpose(data, args.src_shape, args.src_data_type, perm_arg_1, trans_result_1); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to Transpose from NCHW to HWCN"); | |||
| return NOT_CHANGED; | |||
| GELOGE(ret, "Failed to Transpose from NCHW to HWCN"); | |||
| return ret; | |||
| } | |||
| TransArgs args_tmp = args; | |||
| @@ -104,8 +104,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| // check size it should be same with original | |||
| size_t expect_size = n * c * h * w * size; // before has do check about mul | |||
| if (trans_result_1.length != expect_size) { | |||
| GELOGE(INTERNAL_ERROR, "size is not match after transpose!"); | |||
| return NOT_CHANGED; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "size is not match after transpose!"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| // prepare for padding in chw | |||
| @@ -118,20 +118,21 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| // data overflow check totally | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| auto t1 = h_o * w_o; | |||
| auto t2 = n_o * c_o; | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return INTERNAL_ERROR); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t dst_size = total_ele_cnt * size; | |||
| if (dst_size == 0) { | |||
| result.length = 0; | |||
| @@ -140,15 +141,16 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto retMem = memset_s(dst.get(), dst_size, 0, dst_size); | |||
| if (retMem != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "memst failed!"); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memst failed!"); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| // copy data | |||
| auto block = c * h * w * size; | |||
| @@ -159,8 +161,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| for (auto k = 0; k < n; k++) { | |||
| ret = memcpy_s(p_d + k * stride, protectSize, p_s + k * block, block); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "memcpy_s failed!"); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy_s failed!"); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| protectSize = protectSize - block; | |||
| } | |||
| @@ -169,8 +171,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| std::vector<int64_t> perm_arg_2 = {2, 0, 1, 3}; | |||
| ret = ge::formats::Transpose(dst.get(), shape_o, args.src_data_type, perm_arg_2, result); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to Transpose from NCHW to HWCN"); | |||
| return NOT_CHANGED; | |||
| GELOGE(ret, "Failed to Transpose from NCHW to HWCN"); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -180,7 +182,7 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| args_tmp = args; | |||
| auto src_shape = args_tmp.src_shape; | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| @@ -190,8 +192,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| auto w = src_shape.at(kNchwW); | |||
| if (c > kMaxDimsNumC) { | |||
| GELOGE(PARAM_INVALID, "Invalie dim c num[%lu].It should be in (0,4]", c); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Invalie dim c num[%lu].It should be in (0,4]", c); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| auto n_o = Ceil(n, c0) * c0; | |||
| @@ -205,21 +207,22 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| // data overflow check | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| auto t1 = h_o * w_o; | |||
| auto t2 = n_o * c_o; | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return INTERNAL_ERROR); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
| return INTERNAL_ERROR); | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t dst_size = total_ele_cnt * size; | |||
| if (dst_size == 0) { | |||
| @@ -228,15 +231,16 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto ret = memset_s(dst.get(), dst_size, 0, dst_size); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "memst failed!"); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memst failed!"); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| auto p_s = args.data; | |||
| @@ -249,8 +253,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| ret = memcpy_s(p_d + (i * c_o * h_o * w_o + j * h_o * w_o) * size, protectSize, | |||
| p_s + (i * c * h * w + j * h * w) * size, block); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "memcpy_s failed!"); | |||
| return INTERNAL_ERROR; | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy_s failed!"); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| protectSize = protectSize - block; | |||
| } | |||
| @@ -270,37 +274,38 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||
| std::shared_ptr<uint8_t> dst = nullptr; | |||
| auto ret = PaddingNC(args, args_tmp, dst); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Padding in NC axis failed!"); | |||
| GELOGE(ret, "Padding in NC axis failed!"); | |||
| return ret; | |||
| } | |||
| std::vector<int64_t> expect_shape; | |||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape); | |||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, | |||
| args_tmp.dst_format, expect_shape); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args_tmp, expect_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (args_tmp.src_format == FORMAT_NCHW && args_tmp.dst_format == FORMAT_FRACTAL_Z_C04) { | |||
| return TransFormatFromNchwToFzC04(args_tmp, result); | |||
| } | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (CheckDataTypeSupport(data_type) != SUCCESS) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { | |||
| return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); | |||
| } | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) | |||
| @@ -32,13 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| std::vector<int64_t> &dst_shape) { | |||
| int64_t c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| dst_shape.clear(); | |||
| dst_shape.push_back(src_shape.at(kNchwN)); | |||
| @@ -47,9 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| dst_shape.push_back(src_shape.at(kNchwW)); | |||
| dst_shape.push_back(c0); | |||
| if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -59,8 +59,8 @@ Status CheckArgsForNchwToNc1hwc0(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| std::vector<int64_t> expect_5d_shape; | |||
| auto ret = TransShapeNchwToNc1hwc0(args.src_shape, args.src_data_type, expect_5d_shape); | |||
| @@ -68,12 +68,12 @@ Status CheckArgsForNchwToNc1hwc0(const TransArgs &args) { | |||
| return ret; | |||
| } | |||
| if (expect_5d_shape != args.dst_shape) { | |||
| GELOGE(PARAM_INVALID, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to trans format, the src and dst shape are not compatible. data" | |||
| " type %s, src shape %s, dst shape %s, expect dst shape %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(expect_5d_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -82,12 +82,12 @@ Status CheckArgsForNchwToNc1hwc0(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for" | |||
| " dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto n = args.src_shape.at(kNchwN); | |||
| @@ -97,8 +97,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| int64_t c0 = GetCubeSizeByDataType(args.src_data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(INTERNAL_ERROR, "The c0 is invalid %ld", c0); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "The c0 is invalid %ld", c0); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| int64_t c1 = (c - 1) / c0 + 1; | |||
| int64_t hw = h * w; | |||
| @@ -129,21 +129,21 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from NCHW[%ld] offset %ld to " | |||
| "NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld, err-code %d", | |||
| srcIdx, src_offset, n_idx, c1_idx, h_idx, w_idx, c0_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } else { | |||
| auto ret = | |||
| memset_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), 0, static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to set to 0 to " | |||
| "NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld, err-code %d", | |||
| n_idx, c1_idx, h_idx, w_idx, c0_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -159,8 +159,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferNchwNc1hwc0::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForNchwToNc1hwc0(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForNchwToNc1hwc0(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| // Guarantee the validity of parameters in check function | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| @@ -172,20 +173,21 @@ Status FormatTransferNchwNc1hwc0::TransFormat(const TransArgs &args, TransResult | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| GELOGD( | |||
| "Begin to trans format from NCHW to NC1HWC0, src shape %s, data type " | |||
| "%s, dst shape %s memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -195,7 +197,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto | |||
| if (src_format == FORMAT_NCHW) { | |||
| return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); | |||
| } else { | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| } | |||
| @@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| std::vector<int64_t> &dst_shape) { | |||
| int64_t c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 <= 0) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| dst_shape.clear(); | |||
| dst_shape.push_back(src_shape.at(kNhwcN)); | |||
| @@ -44,9 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||
| dst_shape.push_back(src_shape.at(kNhwcW)); | |||
| dst_shape.push_back(c0); | |||
| if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", | |||
| ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -56,21 +56,21 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||
| std::string error = "Dose not support trans format from " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| if (!CheckDataTypeSupported(args.src_data_type)) { | |||
| GELOGE(UNSUPPORTED, "Failed to trans shape from NHWC to NC1HWC0, invalid data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to trans shape from NHWC to NC1HWC0, invalid data type %s", | |||
| TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return UNSUPPORTED; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.src_shape, kNhwcDimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", ShapeToString(args.src_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| if (!CheckShapeValid(args.dst_shape, kNc1hwc0DimsNum)) { | |||
| GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| std::vector<int64_t> expect_dst_shape; | |||
| auto ret = TransShapeNhwcToNc1hwc0(args.src_shape, args.src_data_type, expect_dst_shape); | |||
| @@ -78,12 +78,12 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||
| return ret; | |||
| } | |||
| if (args.dst_shape != expect_dst_shape) { | |||
| GELOGE(PARAM_INVALID, | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to trans format, the src and dst shape are not compatible. src shape %s, dst shape %s, " | |||
| "expect dst shape %s", | |||
| ShapeToString(args.src_shape).c_str(), ShapeToString(args.dst_shape).c_str(), | |||
| ShapeToString(expect_dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| @@ -92,10 +92,10 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| auto n = args.src_shape.at(kNhwcN); | |||
| @@ -131,19 +131,19 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| if (c_idx < c) { | |||
| auto ret = memcpy_s(dst.get() + dst_offset, protected_size, args.data + src_offset, size); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to copy data from NHWC[%ld, %ld, %ld, %ld] offset %ld to " | |||
| "NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld err-code %d", | |||
| n_idx, h_idx, w_idx, c_idx, src_offset, n_idx, c1_idx, h_idx, w_idx, c0_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } else { | |||
| auto ret = memset_s(dst.get() + dst_offset, protected_size, 0, size); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to set 0 to NC1HWC0[%ld, %ld, %ld, %ld, %ld] offset %ld base err-code %d", n_idx, c1_idx, | |||
| h_idx, w_idx, c0_idx, dst_offset, ret); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| } | |||
| @@ -158,8 +158,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in | |||
| } // namespace | |||
| Status FormatTransferNhwcNc1hwc0::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (CheckArgsForNhwcToNc1hwc0(args) != SUCCESS) { | |||
| return PARAM_INVALID; | |||
| Status ret = CheckArgsForNhwcToNc1hwc0(args); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| int size = GetSizeByDataType(args.src_data_type); | |||
| auto total_size = GetItemNumByShape(args.dst_shape) * size; | |||
| @@ -170,18 +171,20 @@ Status FormatTransferNhwcNc1hwc0::TransFormat(const TransArgs &args, TransResult | |||
| return SUCCESS; | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from NHWC to NC1HWC0, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ret = GetDstDataAfterTrans(args, result, size, total_size); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -190,15 +193,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto | |||
| DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | |||
| if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { | |||
| if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | |||
| GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check src shape %s", | |||
| ShapeToString(src_shape).c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); | |||
| } else if (src_format != FORMAT_NHWC) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } else { | |||
| return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| } | |||
| @@ -141,7 +141,7 @@ std::vector<int64_t> TransShapeByPerm(const std::vector<int64_t> &src_shape, con | |||
| Status Transpose(const uint8_t *src, const std::vector<int64_t> &src_shape, DataType src_data_type, | |||
| const std::vector<int64_t> &perm_arg, TransResult &result) { | |||
| if (!IsTransposeArgValid(src, src_shape, src_data_type, perm_arg)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| auto dst_shape = TransShapeByPerm(src_shape, perm_arg); | |||
| @@ -172,12 +172,12 @@ Status Transpose(const uint8_t *src, const std::vector<int64_t> &src_shape, Data | |||
| auto ret = memcpy_s(dst.get() + dst_offset_bytes, static_cast<size_t>(protected_size), src + src_offset, | |||
| static_cast<size_t>(data_size)); | |||
| if (ret != EOK) { | |||
| GELOGE(INTERNAL_ERROR, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to transpose, src shape %s, perm arg %s, dst shape %s, " | |||
| "failed to write to dst offset %ld, current dim offset %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(perm_arg).c_str(), ShapeToString(dst_shape).c_str(), | |||
| dst_offset_bytes, ShapeToString(dst_indexes).c_str()); | |||
| return INTERNAL_ERROR; | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| AddOne(dst_shape, dst_indexes); | |||
| ++dst_index; | |||
| @@ -192,14 +192,14 @@ Status TransposeWithShapeCheck(const uint8_t *data, const std::vector<int64_t> & | |||
| const std::vector<int64_t> &dst_shape, DataType src_data_type, | |||
| const std::vector<int64_t> &perm_arg, TransResult &result) { | |||
| if (!IsTransposeArgValid(data, src_shape, src_data_type, perm_arg)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| auto expected_shape = TransShapeByPerm(src_shape, perm_arg); | |||
| if (dst_shape != expected_shape) { | |||
| std::string error = "Failed to trans axis for perm_arg" + | |||
| FmtToStr(ShapeToString(perm_arg)) + ", invalid dst shape" + | |||
| FmtToStr(ShapeToString(dst_shape)) + ", expect" + FmtToStr(ShapeToString(expected_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | |||
| } | |||
| return Transpose(data, src_shape, src_data_type, perm_arg, result); | |||
| @@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vector<int64_t | |||
| std::string error = "Failed to trans shape, do not support transpose from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| auto iter = dst_iter->second.find(dst_format); | |||
| if (iter == dst_iter->second.end()) { | |||
| std::string error = "Failed to trans shape, do not support transpose from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| perm = iter->second; | |||
| return SUCCESS; | |||
| @@ -233,7 +233,7 @@ Status FormatTransferTranspose::TransFormat(const TransArgs &args, TransResult & | |||
| return ret; | |||
| } | |||
| if (!IsTransShapeDstCorrect(args, expected_shape)) { | |||
| return PARAM_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| return Transpose(args.data, args.src_shape, args.src_data_type, perm_args[args.src_format][args.dst_format], result); | |||
| @@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector< | |||
| std::vector<int64_t> perm_arg; | |||
| GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); | |||
| if (!IsShapeArgValid(src_shape, perm_arg)) { | |||
| return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| dst_shape = TransShapeByPerm(src_shape, perm_arg); | |||
| return SUCCESS; | |||
| @@ -38,14 +38,14 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransFormat(const TransArg | |||
| std::string error = "Failed to trans data from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| auto src_shape_size = GetItemNumByShape(args.src_shape); | |||
| if (args.data == nullptr && src_shape_size != 0) { | |||
| GELOGE(PARAM_INVALID, "Invalid input null data"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Invalid input null data"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| return transfer->TransFormat(args, result); | |||
| @@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form | |||
| std::string error = "Failed to trans data from format " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | |||
| FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_FORMAT_INVALID; | |||
| } | |||
| return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); | |||
| @@ -77,13 +77,13 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransDataType(const CastAr | |||
| std::string error = "Failed to trans data from datatype " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + | |||
| FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)); | |||
| GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||
| return UNSUPPORTED; | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_DATATYPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (args.data == nullptr && args.src_data_size != 0) { | |||
| GELOGE(PARAM_INVALID, "Invalid input null data"); | |||
| return PARAM_INVALID; | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Invalid input null data"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| return transfer->TransDataType(args, result); | |||
| @@ -87,12 +87,13 @@ Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) { | |||
| std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr(); | |||
| if (model_task_def == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| GELOGD("SaveSizeToModelDef task_info_size is 0."); | |||
| om_info.push_back(0); | |||
| } else { | |||
| size_t partition_task_size = model_task_def->ByteSizeLong(); | |||
| GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||
| om_info.push_back(partition_task_size); | |||
| } | |||
| size_t partition_task_size = model_task_def->ByteSizeLong(); | |||
| GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||
| om_info.push_back(partition_task_size); | |||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info), | |||
| GELOGE(FAILED, "SetListInt of om_info_list failed."); | |||
| @@ -90,6 +90,8 @@ REGISTER_OPTYPE_DEFINE(DEPCONVOLUTION, "ConvolutionDepthwise"); | |||
| REGISTER_OPTYPE_DEFINE(DROPOUT, "Dropout"); | |||
| REGISTER_OPTYPE_DEFINE(DROPOUTGENMASK, "DropOutGenMask"); | |||
| REGISTER_OPTYPE_DEFINE(DROPOUTDOMASK, "DropOutDoMask"); | |||
| REGISTER_OPTYPE_DEFINE(DROPOUTDOMASKV3, "DropOutDoMaskV3"); | |||
| REGISTER_OPTYPE_DEFINE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); | |||
| REGISTER_OPTYPE_DEFINE(CONCAT, "Concat"); | |||
| REGISTER_OPTYPE_DEFINE(ROIPOOLING, "ROIPooling"); | |||
| REGISTER_OPTYPE_DEFINE(PROPOSAL, "Proposal"); | |||
| @@ -217,7 +217,7 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { | |||
| std::string unsupported_reason; | |||
| // It will be replaced by engine' checksupport | |||
| uint64_t start_time = GetCurrentTimestamp(); | |||
| if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { | |||
| if (kernel_info_store->second->CheckSupported(node_ptr, unsupported_reason)) { | |||
| checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; | |||
| op_desc->SetOpEngineName(it.engine); | |||
| op_desc->SetOpKernelLibName(kernel_name); | |||
| @@ -8,6 +8,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "ge_executor.cc" | |||
| @@ -17,6 +18,7 @@ set(SRC_LIST | |||
| "../common/dump/dump_properties.cc" | |||
| "../common/dump/dump_manager.cc" | |||
| "../common/dump/dump_op.cc" | |||
| "../common/dump/opdebug_register.cc" | |||
| "../common/profiling/ge_profiling.cc" | |||
| "../graph/load/graph_loader.cc" | |||
| "../graph/execute/graph_execute.cc" | |||
| @@ -161,7 +163,7 @@ set(SRC_LIST | |||
| ) | |||
| ######## libge_executor.a ######## | |||
| add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||
| target_compile_options(ge_executor PRIVATE | |||
| $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | |||
| @@ -190,7 +192,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_static | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| @@ -211,6 +213,7 @@ target_link_libraries(ge_executor PRIVATE | |||
| add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
| target_compile_options(ge_executor_shared PRIVATE | |||
| -fno-common | |||
| -Werror | |||
| -O2 | |||
| -Wno-deprecated-declarations | |||
| @@ -30,6 +30,8 @@ | |||
| #include "single_op/single_op_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "graph/opsproto_manager.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| using std::string; | |||
| using std::vector; | |||
| @@ -199,6 +201,33 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||
| namespace ge { | |||
| bool GeExecutor::isInit_ = false; | |||
| static void InitOpsProtoManager() { | |||
| string opsproto_path; | |||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
| if (path_env != nullptr) { | |||
| string path = path_env; | |||
| string file_path = RealPath(path.c_str()); | |||
| if (file_path.empty()) { | |||
| GELOGE(FAILED, "[Check][EnvPath]ASCEND_OPP_PATH path [%s] is invalid.", path.c_str()); | |||
| REPORT_INPUT_ERROR("E68016", {"ASCEND_OPP_PATH", path}); | |||
| return; | |||
| } | |||
| opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); | |||
| GELOGI("Get opsproto so path from env : %s", path.c_str()); | |||
| } else { | |||
| string path_base = PluginManager::GetPath(); | |||
| GELOGI("path_base is %s", path_base.c_str()); | |||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
| } | |||
| GELOGI("Get opsproto path is %s", opsproto_path.c_str()); | |||
| OpsProtoManager *manager = OpsProtoManager::Instance(); | |||
| map<string, string> option_tmp; | |||
| option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path)); | |||
| (void)manager->Initialize(option_tmp); | |||
| } | |||
| GeExecutor::GeExecutor() {} | |||
| Status GeExecutor::Initialize() { | |||
| @@ -208,6 +237,16 @@ Status GeExecutor::Initialize() { | |||
| return ge::SUCCESS; | |||
| } | |||
| OpTilingManager::GetInstance().LoadSo(); | |||
| Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||
| if (init_hostcpu_engine_status != SUCCESS) { | |||
| GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine"); | |||
| return init_hostcpu_engine_status; | |||
| } | |||
| InitOpsProtoManager(); | |||
| std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | |||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | |||
| auto ret = MemManager::Instance().Initialize(mem_type); | |||
| @@ -20,6 +20,8 @@ set(OPS_KERNEL_SRC_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_ops_shared PROTO_OPS_SHARED_SRCS PROTO_OPS_SHARED_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_ops_static PROTO_OPS_STATIC_SRCS PROTO_OPS_STATIC_HDRS ${PROTO_LIST}) | |||
| ############ libge_local_engine.so ############ | |||
| add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
| @@ -119,7 +121,7 @@ set_target_properties(atc_ge_local_engine PROPERTIES | |||
| ) | |||
| ############ libge_local_opskernel_builder.so ############ | |||
| add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||
| target_compile_options(ge_local_opskernel_builder PRIVATE | |||
| -Werror | |||
| @@ -143,7 +145,7 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -166,7 +168,7 @@ target_link_libraries(ge_local_opskernel_builder PRIVATE | |||
| ) | |||
| ############ atclib/libge_local_opskernel_builder.so ############ | |||
| add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||
| target_compile_options(atc_ge_local_opskernel_builder PRIVATE | |||
| -Werror | |||
| @@ -190,7 +192,7 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -218,7 +220,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES | |||
| ) | |||
| ############ libge_local_opskernel_builder.a ############ | |||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_STATIC_HDRS}) | |||
| target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||
| -Werror | |||
| @@ -243,7 +245,7 @@ target_include_directories(ge_local_opskernel_builder_static PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_static | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -38,6 +38,7 @@ REGISTER_OP_CREATOR(ExpandDims, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(Reshape, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(ReFormat, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(Squeeze, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(Unsqueeze, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(Size, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(Shape, GeDeletedOp); | |||
| REGISTER_OP_CREATOR(ShapeN, GeDeletedOp); | |||
| @@ -16,14 +16,12 @@ | |||
| #include "ge_runtime/task/label_goto_task.h" | |||
| #include "ge_runtime/task/task_factory.h" | |||
| #include "framework/common/util.h" | |||
| namespace ge { | |||
| namespace model_runner { | |||
| LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | |||
| : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | |||
| task_info_(task_info), | |||
| stream_(nullptr), | |||
| label_(nullptr) { | |||
| : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) { | |||
| if (task_info_ == nullptr) { | |||
| GELOGW("task_info_ is null!"); | |||
| return; | |||
| @@ -42,29 +40,78 @@ LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::share | |||
| label_ = label_list[label_id]; | |||
| } | |||
| LabelGotoTask::~LabelGotoTask() {} | |||
| LabelGotoTask::~LabelGotoTask() { | |||
| GE_FREE_RT_LOG(label_info_); | |||
| GE_FREE_RT_LOG(index_value_); | |||
| } | |||
| bool LabelGotoTask::Distribute() { | |||
| GELOGI("LabelGotoTask Distribute start."); | |||
| if (!CheckParamValid()) { | |||
| return false; | |||
| } | |||
| const std::vector<void *> label_list = { label_ }; | |||
| rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| uint64_t branch_index = 0; | |||
| rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||
| rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| GELOGI("DistributeTask end."); | |||
| return true; | |||
| } | |||
| bool LabelGotoTask::CheckParamValid() { | |||
| if (stream_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "stream is null!"); | |||
| return false; | |||
| } | |||
| if (label_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "label is null!"); | |||
| return false; | |||
| } | |||
| rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| if (label_info_ != nullptr) { | |||
| GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||
| return false; | |||
| } | |||
| if (index_value_ != nullptr) { | |||
| GELOGE(PARAM_INVALID, "index_value_ has dirty data."); | |||
| return false; | |||
| } | |||
| GELOGI("DistributeTask end."); | |||
| return true; | |||
| } | |||
| REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -31,9 +31,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> { | |||
| bool Distribute() override; | |||
| private: | |||
| bool CheckParamValid(); | |||
| std::shared_ptr<LabelGotoTaskInfo> task_info_; | |||
| void *stream_; | |||
| void *label_; | |||
| void *stream_{nullptr}; | |||
| void *label_{nullptr}; | |||
| void *label_info_{nullptr}; | |||
| void *index_value_{nullptr}; | |||
| }; | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -50,9 +50,13 @@ const char *const kFileNameSuffix = "online"; | |||
| const char *const kAicpuAllshape = "_AllShape"; | |||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
| const int64_t kDynamicDimValue = -2; | |||
| const int kDefaultDeviceId = 0; | |||
| const int kDefaultJobId = 0; | |||
| std::map<ge::OpEngineType, std::string> engine_type_map{ | |||
| {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | |||
| {ge::ENGINE_SYS, kEngineNameDefault}, | |||
| {ge::ENGINE_AICORE, kAIcoreEngine}, | |||
| {ge::ENGINE_VECTOR, kVectorEngine}}; | |||
| bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||
| for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | |||
| @@ -66,7 +70,8 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||
| } // namespace | |||
| namespace ge { | |||
| static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engine_type) { | |||
| static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_type) { | |||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||
| if (engine_type == ENGINE_SYS) { | |||
| GELOGI("CheckEngineType: use default engine."); | |||
| @@ -82,8 +87,9 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||
| } else { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, | |||
| {op_desc->GetName(), op_desc->GetType(), "engine type", | |||
| "it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"}); | |||
| GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | |||
| "it only support default/AIcoreEngine/VectorEngine"}); | |||
| GELOGE(FAILED, "[Check][EngineType]value:%d not support, " | |||
| "only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type)); | |||
| return FAILED; | |||
| } | |||
| @@ -123,7 +129,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||
| auto kernel_info_store = kernel_map.find(kernel_name); | |||
| if (kernel_info_store != kernel_map.end()) { | |||
| std::string unsupported_reason; | |||
| if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { | |||
| if (kernel_info_store->second->CheckSupported(node, unsupported_reason)) { | |||
| op_desc->SetOpEngineName(op_engine_name); | |||
| op_desc->SetOpKernelLibName(kernel_name); | |||
| GELOGI("CheckEngineType:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), | |||
| @@ -187,17 +193,20 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||
| (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||
| GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | |||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | |||
| GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||
| "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | |||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||
| "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | |||
| if (attr) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail."); | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, | |||
| "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | |||
| } | |||
| ge::NodePtr arg_node = graph->AddNode(data_op); | |||
| GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail."); | |||
| GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail"); | |||
| GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), | |||
| "Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str()); | |||
| "[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -212,20 +221,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||
| for (const auto &out_desc : outputs) { | |||
| GeTensorDesc tensor = out_desc.GetTensorDesc(); | |||
| TensorUtils::SetInputTensor(tensor, true); | |||
| GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail"); | |||
| GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||
| "[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||
| TensorUtils::SetInputTensor(tensor, false); | |||
| TensorUtils::SetOutputTensor(tensor, true); | |||
| GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail"); | |||
| GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||
| "[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||
| count++; | |||
| } | |||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | |||
| ge::NodePtr out_node = graph->AddNode(op_desc); | |||
| GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail."); | |||
| GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, | |||
| "[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID()); | |||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | |||
| for (int32_t i = 0; i < count; ++i) { | |||
| GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), | |||
| "Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str()); | |||
| "[Add][Edge]fail from node:%s to node:%s", node->GetName().c_str(), out_node->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| @@ -553,6 +565,44 @@ bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) { | |||
| return true; | |||
| } | |||
| Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) { | |||
| bool is_unknown_shape = false; | |||
| Status ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "[Check][IsUnknownShape]Check root model is unknown shape failed, model id:%u", | |||
| ge_root_model->GetModelId()); | |||
| REPORT_CALL_ERROR("E19999", "Check root model is unknown shape failed, model id:%zu", | |||
| ge_root_model->GetModelId()); | |||
| return FAILED; | |||
| } | |||
| GeModelPtr model_root = nullptr; | |||
| if (is_unknown_shape) { | |||
| model_root = MakeShared<GeModel>(); | |||
| GE_CHECK_NOTNULL(model_root); | |||
| model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||
| ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||
| } | |||
| ModelHelper model_helper; | |||
| string model_name; | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||
| model_name); | |||
| if (name_ret != SUCCESS) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||
| GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s", | |||
| ge_root_model->GetRootGraph()->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid,", | |||
| "root graph name: %s", ge_root_model->GetRootGraph()->GetName().c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| GE_CHECK_NOTNULL(ge_model); | |||
| ge_model->SetName(model_name); | |||
| return SUCCESS; | |||
| } | |||
| Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | |||
| ModelBufferData &model, bool is_offline) { | |||
| rtContext_t ctx = nullptr; | |||
| @@ -587,20 +637,10 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
| } | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| ModelHelper model_helper; | |||
| string model_name = ""; | |||
| Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||
| model_name); | |||
| if (name_ret != SUCCESS) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||
| GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); | |||
| return PARAM_INVALID; | |||
| ret = SetModelNameForDump(ge_root_model); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); | |||
| ge_model->SetName(model_name); | |||
| ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Save model failed"); | |||
| @@ -653,6 +693,22 @@ namespace { | |||
| } | |||
| } | |||
| bool GeGenerator::CheckNoAicore(const ComputeGraphPtr &graph) { | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| if (node == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| if (op_desc->GetOpEngineName() == kAIcoreEngine) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| const vector<GeTensor> &outputs) { | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||
| @@ -697,22 +753,23 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | |||
| GE_CHECK_NOTNULL(op_desc_tmp); | |||
| // 1. check engine type when compile online | |||
| // 1. Create ComputeGraph. | |||
| string name = ge::CurrentTimeInStr() + "_" + model_file_name; | |||
| Graph graph; | |||
| GE_CHK_STATUS(BuildSingleOpGraph(op_desc, inputs, outputs, name, graph), "make graph fail."); | |||
| // 2. check engine type when compile online | |||
| if (model_file_name == kFileNameSuffix) { | |||
| Status ret = CheckEngineTypeSupport(op_desc, engine_type); | |||
| auto comp_graph = GraphUtils::GetComputeGraph(graph); | |||
| GE_CHECK_NOTNULL(comp_graph); | |||
| auto node = comp_graph->FindNode(op_desc->GetName()); | |||
| Status ret = CheckEngineTypeSupport(node, engine_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "check engine type failed."); | |||
| GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| } | |||
| // 2. Create ComputeGraph. | |||
| string name = ge::CurrentTimeInStr() + "_" + model_file_name; | |||
| Graph graph; | |||
| if (BuildSingleOpGraph(op_desc, inputs, outputs, name, graph) != ge::SUCCESS) { | |||
| GELOGE(GRAPH_FAILED, "make graph fail."); | |||
| return GRAPH_FAILED; | |||
| } | |||
| GELOGI("ATC parser success in single op build."); | |||
| GeRootModelPtr ge_root_model = nullptr; | |||
| @@ -732,7 +789,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| bool all_shape = false; | |||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | |||
| if (all_shape) { | |||
| if (all_shape && CheckNoAicore(root_graph)) { | |||
| GELOGD("Get aicpu all_shape kernel!"); | |||
| vector<GeTensor> inputs_dynamic; | |||
| vector<GeTensor> outputs_dynamic; | |||
| @@ -869,13 +926,12 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo | |||
| "ge root model has no sub model") | |||
| GeModelPtr model_root = nullptr; | |||
| if (is_unknown_shape) { | |||
| model_root = make_shared<GeModel>(); | |||
| model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||
| ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||
| model_root->SetName(ge_root_model->GetRootGraph()->GetName()); | |||
| auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| model_root = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| } else { | |||
| model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; | |||
| } | |||
| GE_CHECK_NOTNULL(model_root); | |||
| // set atc version | |||
| if (!SetAtcVersionInfo(*(model_root.get()))) { | |||
| GELOGW("SetPackageVersionInfo of atc failed!"); | |||
| @@ -913,6 +969,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||
| static std::atomic<uint64_t> atomic_session_id(0); | |||
| auto session_id = atomic_session_id.fetch_add(1); | |||
| // This is a temporary add for graph with variable | |||
| auto version = static_cast<int32_t>(SessionVersion::ClOUD_VERSION); | |||
| ret = VarManager::Instance(session_id)->Init(version, session_id, kDefaultDeviceId, kDefaultJobId); | |||
| GELOGI("Start init var instance, session_id %lu", session_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("Failed init var instance, session_id %lu", session_id); | |||
| } | |||
| if (is_singleop_unregistered_) { | |||
| ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id); | |||
| } else { | |||
| @@ -387,7 +387,7 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor | |||
| GE_CHECK_NOTNULL(out_anchor); | |||
| NodePtr in_node = out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); | |||
| OpDescBuilder op_desc_builder(name, MEMCPYASYNC); | |||
| OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .Build(); | |||
| @@ -400,6 +400,10 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor | |||
| } | |||
| static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||
| if (graph->GetGraphUnknownFlag()) { | |||
| GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| for (auto &node : graph->GetDirectNode()) { | |||
| // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||
| auto op_desc = node->GetOpDesc(); | |||
| @@ -33,13 +33,21 @@ using std::queue; | |||
| namespace ge { | |||
| LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | |||
| const string &LogicalStreamPass::GetName() const { return name_; } | |||
| const string &LogicalStreamPass::GetName() const { | |||
| return name_; | |||
| } | |||
| bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; } | |||
| bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { | |||
| return subgraph.engine_conf.skip_assign_stream; | |||
| } | |||
| bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; } | |||
| bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { | |||
| return subgraph.engine_conf.attach; | |||
| } | |||
| bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; } | |||
| bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { | |||
| return subgraph.engine_conf.independent; | |||
| } | |||
| bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const { | |||
| return !subgraph.subgraph_info.GetStreamLabel().empty(); | |||
| @@ -60,14 +68,14 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> & | |||
| // Subgraphs of the same stream_label are assigned to the same stream, | |||
| // and different stream_labels are assigned new streams. | |||
| auto iter = label_streams.find(stream_label); | |||
| if (iter != label_streams.end()) { | |||
| subgraph->stream_id = iter->second; | |||
| } else { | |||
| if (iter == label_streams.end()) { | |||
| subgraph->stream_id = next_stream; | |||
| GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); | |||
| GELOGI("[Assign][NewStreamId] %ld for label %s.", next_stream, stream_label.c_str()); | |||
| label_streams.emplace(stream_label, next_stream); | |||
| ++next_stream; | |||
| next_stream++; | |||
| } else { | |||
| subgraph->stream_id = iter->second; | |||
| } | |||
| changed = true; | |||
| } | |||
| @@ -92,15 +100,15 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||
| const string &stream_label = subgraph->subgraph_info.GetStreamLabel(); | |||
| auto &label_streams = engine_streams[engine]; | |||
| auto iter = label_streams.find(stream_label); | |||
| if (iter != label_streams.end()) { | |||
| subgraph->stream_id = iter->second; | |||
| } else { | |||
| if (iter == label_streams.end()) { | |||
| subgraph->stream_id = next_stream; | |||
| GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), | |||
| GELOGI("[Assign][NewStreamId:independent] %ld for engine %s (label: %s).", next_stream, engine.c_str(), | |||
| stream_label.c_str()); | |||
| label_streams.emplace(stream_label, next_stream); | |||
| ++next_stream; | |||
| next_stream++; | |||
| } else { | |||
| subgraph->stream_id = iter->second; | |||
| } | |||
| changed = true; | |||
| } | |||
| @@ -121,14 +129,16 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||
| } | |||
| SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map); | |||
| if (reusable_subgraph != nullptr) { | |||
| if (reusable_subgraph == nullptr) { | |||
| (void)AssignNewStream(subgraph); | |||
| } else { | |||
| if (HasAssignedStream(*reusable_subgraph)) { | |||
| subgraph->stream_id = reusable_subgraph->stream_id; | |||
| } else { | |||
| int64_t stream_id = AssignNewStream(reusable_subgraph); | |||
| subgraph->stream_id = stream_id; | |||
| GELOGI("Reusable subgraph %s has not been assigned a stream, now assign new stream %ld.", | |||
| reusable_subgraph->name.c_str(), stream_id); | |||
| GELOGI("[Assign][NewStreamId] %ld for Reusable subgraph %s cause has not been assigned before.", | |||
| stream_id, reusable_subgraph->name.c_str()); | |||
| } | |||
| if (reusable_subgraph->reused_subgraph != nullptr) { | |||
| @@ -137,11 +147,10 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||
| subgraph->reused_subgraph = reusable_subgraph; | |||
| reused_subgraphs_.emplace_back(subgraph, reusable_subgraph); | |||
| GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), | |||
| GELOGI("[Reuse][Stream]Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", | |||
| subgraph->name.c_str(), | |||
| subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), | |||
| reusable_subgraph->engine_conf.id.c_str()); | |||
| } else { | |||
| (void)AssignNewStream(subgraph); | |||
| } | |||
| changed = true; | |||
| } | |||
| @@ -191,13 +200,15 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr | |||
| auto iter = pld_subgraph_map.find(end_pld_pair.second); | |||
| if (iter != pld_subgraph_map.end()) { | |||
| const SubgraphPtr &pred_subgraph_succ = iter->second; | |||
| if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) { | |||
| if ((pred_subgraph_succ != subgraph) && | |||
| (pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) { | |||
| if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || | |||
| IsEngineAttach(*subgraph)) { | |||
| return true; | |||
| } | |||
| @@ -249,7 +260,7 @@ int64_t AssignByDependencyPass::AssignNewStream(SubgraphPtr subgraph) { | |||
| engine_stream_num_[engine_name] = stream_id + 1; | |||
| } | |||
| GELOGI("Subgraph %s assigns new temp stream %ld (engine: %s).", subgraph->name.c_str(), stream_id, | |||
| GELOGI("[Assign][NewStreamId:temp]id:%ld for Subgraph %s (engine: %s).", stream_id, subgraph->name.c_str(), | |||
| engine_name.c_str()); | |||
| return stream_id; | |||
| @@ -282,7 +293,7 @@ void AssignByDependencyPass::UpdateAssignedSubgraphs(Context &context) { | |||
| GELOGI("Subgraph %s of engine %s reuses default stream %ld.", subgraph->name.c_str(), | |||
| subgraph->engine_conf.id.c_str(), context.default_stream); | |||
| } else { | |||
| GELOGI("Stream of subgraph %s has been updated to %ld.", subgraph->name.c_str(), subgraph->stream_id); | |||
| GELOGI("[Update][StreamId]id:%ld for subgraph %s.", subgraph->stream_id, subgraph->name.c_str()); | |||
| } | |||
| } | |||
| } | |||
| @@ -293,7 +304,7 @@ void AssignByDependencyPass::UpdateReusedSubgraphs() { | |||
| auto &cur_subgraph = item.first; | |||
| auto &reused_graph = item.second; | |||
| cur_subgraph->stream_id = reused_graph->stream_id; | |||
| GELOGI("Stream of subgraph %s has been updated to %ld.", cur_subgraph->name.c_str(), cur_subgraph->stream_id); | |||
| GELOGI("[Update][StreamId]id:%ld for subgraph %s.", cur_subgraph->stream_id, cur_subgraph->name.c_str()); | |||
| } | |||
| } | |||
| @@ -330,7 +341,7 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr | |||
| engine_name.c_str()); | |||
| return INTERNAL_ERROR; | |||
| } else { | |||
| GELOGI("Subgraph %s is assigned stream %ld (engine: %s).", subgraph->name.c_str(), subgraph->stream_id, | |||
| GELOGI("[Assign][StreamId] %ld for Subgraph %s (engine: %s).", subgraph->stream_id, subgraph->name.c_str(), | |||
| engine_name.c_str()); | |||
| } | |||
| } | |||
| @@ -353,12 +364,12 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr | |||
| GELOGD("Node %s of type %s in subgraph %s is assigned parent stream %ld (engine: %s).", node->GetName().c_str(), | |||
| node->GetType().c_str(), subgraph->name.c_str(), context.default_stream, engine_name.c_str()); | |||
| } else if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) { | |||
| GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).", | |||
| GELOGD("[Skip][StreamIdAssign]Node %s of type %s in subgraph %s doesn't need (engine: %s).", | |||
| node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str()); | |||
| } else { | |||
| node->GetOpDesc()->SetStreamId(stream_id); | |||
| GELOGD("Node %s of type %s in subgraph %s is assigned stream %ld (engine: %s).", node->GetName().c_str(), | |||
| node->GetType().c_str(), subgraph->name.c_str(), stream_id, engine_name.c_str()); | |||
| GELOGD("[Assign][StreamId]id:%ld for Node %s of type %s in subgraph %s (engine: %s).", stream_id, | |||
| node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str()); | |||
| } | |||
| } | |||
| } | |||
| @@ -366,6 +377,48 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr | |||
| return SUCCESS; | |||
| } | |||
| Status UpdateForParallelGroupPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | |||
| std::map<int, vector<OpDescPtr>> stream_op_map; | |||
| for (const SubgraphPtr &subgraph : subgraphs) { | |||
| auto compute_graph = subgraph->subgraph_info.GetSubGraph(); | |||
| for (const NodePtr &node : compute_graph->GetDirectNode()) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| if (op_desc->HasAttr(ATTR_NAME_PARALLEL_GROUP)) { | |||
| int64_t op_desc_stream_id = op_desc->GetStreamId(); | |||
| stream_op_map[op_desc_stream_id].push_back(op_desc); | |||
| } | |||
| } | |||
| } | |||
| for (const auto &itr : stream_op_map) { | |||
| if (itr.first == kInvalidStream) { | |||
| continue; | |||
| } | |||
| std::map<std::string, int64_t> group_2_stream_id; | |||
| for (const auto &op_desc : itr.second) { | |||
| std::string group_name; | |||
| if (!AttrUtils::GetStr(op_desc, ATTR_NAME_PARALLEL_GROUP, group_name)) { | |||
| GELOGE(FAILED, "[GetAttr][OpDesc]Get node %s ATTR_NAME_PARALLEL_GROUP failed.", op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Get node %s ATTR_NAME_PARALLEL_GROUP failed.", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| const auto &itr = group_2_stream_id.find(group_name); | |||
| int64_t new_stream_id = kInvalidStream; | |||
| int64_t old_stream_id = op_desc->GetStreamId(); | |||
| if (itr != group_2_stream_id.end()) { | |||
| new_stream_id = itr->second; | |||
| } else { | |||
| new_stream_id = context.next_stream++; | |||
| group_2_stream_id[group_name] = new_stream_id; | |||
| } | |||
| op_desc->SetStreamId(new_stream_id); | |||
| GELOGD("Node %s assigned stream %ld from stream %ld.", | |||
| op_desc->GetName().c_str(), new_stream_id, old_stream_id); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| int64_t UpdateForSkippedEnginePass::GetSingleInoutStream(const NodePtr &node) const { | |||
| set<int64_t> stream_ids; | |||
| @@ -387,8 +440,8 @@ int64_t UpdateForSkippedEnginePass::GetSingleInoutStream(const NodePtr &node) co | |||
| if (stream_ids.size() == 1) { | |||
| int64_t stream_id = *(stream_ids.begin()); | |||
| GELOGI("The stream of all input and output nodes of node %s (type: %s) is %ld.", node->GetName().c_str(), | |||
| node->GetType().c_str(), stream_id); | |||
| GELOGI("[Get][SingleStreamId]The stream of all input and output nodes of node %s (type: %s) is %ld.", | |||
| node->GetName().c_str(), node->GetType().c_str(), stream_id); | |||
| return stream_id; | |||
| } | |||
| @@ -406,7 +459,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| auto stream_id = op_desc->GetStreamId(); | |||
| if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) { | |||
| if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) { | |||
| ops_without_label.emplace(op_desc); | |||
| } | |||
| } | |||
| @@ -427,8 +480,8 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr | |||
| int64_t inout_stream = GetSingleInoutStream(node); | |||
| if (inout_stream != kInvalidStream) { | |||
| op_desc->SetStreamId(inout_stream); | |||
| GELOGI("Node %s of type %s reassign to stream %ld from stream %ld.", node->GetName().c_str(), | |||
| node->GetType().c_str(), inout_stream, stream_id); | |||
| GELOGI("[Reassign][StreamId]%ld for Node %s of type %s from stream %ld.", | |||
| inout_stream, node->GetName().c_str(), node->GetType().c_str(), stream_id); | |||
| } | |||
| } | |||
| } | |||
| @@ -455,7 +508,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||
| return NOT_CHANGED; | |||
| } | |||
| GELOGI("AllReduceParallelPass is enabled."); | |||
| GELOGI("[Run][AllReduceParallelPass] start"); | |||
| GE_DUMP(graph, "BeforeAllReduceParallel"); | |||
| // All successors of HcomAllReduce. | |||
| @@ -463,7 +516,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||
| for (const NodePtr &node : graph->GetDirectNode()) { | |||
| if (!IsHcomNode(node->GetType()) || | |||
| node->GetInDataNodes().size() <= 1) { | |||
| (node->GetInDataNodes().size() <= 1)) { | |||
| continue; | |||
| } | |||
| @@ -565,7 +618,7 @@ Status LogicalStreamAllocator::Assign(const ComputeGraphPtr &root_graph, const G | |||
| RefreshContinuousStreams(root_graph); | |||
| stream_num = context_.next_stream; | |||
| GELOGI("Assigned logical stream num: %ld.", stream_num); | |||
| GELOGI("[Assign][LogicalStream] At last, stream num: %ld.", stream_num); | |||
| return SUCCESS; | |||
| } | |||
| @@ -575,7 +628,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||
| GE_CHECK_NOTNULL(graph); | |||
| NodePtr parent_node = graph->GetParentNode(); | |||
| if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) { | |||
| if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) { | |||
| context_.default_stream = kInvalidStream; | |||
| } else { | |||
| context_.default_stream = parent_node->GetOpDesc()->GetStreamId(); | |||
| @@ -597,7 +650,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||
| return status; | |||
| } | |||
| GELOGD("Subgraphs of graph %s:", graph->GetName().c_str()); | |||
| GELOGD("[Show][Subgraphs] in graph %s", graph->GetName().c_str()); | |||
| for (const auto &subgraph : subgraphs) { | |||
| if (subgraph != nullptr) { | |||
| GELOGD("subgraph: %s", subgraph->name.c_str()); | |||
| @@ -655,6 +708,7 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec | |||
| passes.emplace_back(MakeShared<IndependentStreamPass>()); | |||
| passes.emplace_back(MakeShared<AssignByDependencyPass>()); | |||
| passes.emplace_back(MakeShared<NodeStreamUpdatePass>()); | |||
| passes.emplace_back(MakeShared<UpdateForParallelGroupPass>()); | |||
| passes.emplace_back(MakeShared<AllReduceParallelPass>()); | |||
| passes.emplace_back(MakeShared<UpdateForSkippedEnginePass>()); | |||
| } | |||
| @@ -664,9 +718,9 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec | |||
| Status status = pass->Run(graph, subgraphs, context_); | |||
| if (status == SUCCESS) { | |||
| GELOGD("Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||
| GELOGD("[Show][Status]Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||
| } else if (status == NOT_CHANGED) { | |||
| GELOGD("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||
| GELOGD("[Show][Status]Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||
| } else { | |||
| GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str()); | |||
| return status; | |||
| @@ -686,7 +740,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc != nullptr) { | |||
| int64_t stream_id = op_desc->GetStreamId(); | |||
| if (stream_id != kInvalidStream && stream_id < stream_num) { | |||
| if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||
| stream_has_node[stream_id] = true; | |||
| } | |||
| } | |||
| @@ -695,10 +749,10 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||
| context_.next_stream = 0; | |||
| vector<int64_t> old_to_new_streams(stream_num, kInvalidStream); | |||
| for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) { | |||
| for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) { | |||
| if (stream_has_node[old_stream]) { | |||
| old_to_new_streams[old_stream] = context_.next_stream; | |||
| ++context_.next_stream; | |||
| context_.next_stream++; | |||
| } | |||
| } | |||
| @@ -706,7 +760,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc != nullptr) { | |||
| int64_t stream_id = op_desc->GetStreamId(); | |||
| if (stream_id != kInvalidStream && stream_id < stream_num) { | |||
| if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||
| op_desc->SetStreamId(old_to_new_streams[stream_id]); | |||
| } | |||
| } | |||
| @@ -149,6 +149,13 @@ class NodeStreamUpdatePass : public LogicalStreamPass { | |||
| Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override; | |||
| }; | |||
| // assign stream by parallel group | |||
| class UpdateForParallelGroupPass : public LogicalStreamPass { | |||
| public: | |||
| STREAM_PASS_DEFAULT_FUNC(UpdateForParallelGroupPass); | |||
| Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override; | |||
| }; | |||
| // Update the stream of subgraphs to nodes. | |||
| class UpdateForSkippedEnginePass : public LogicalStreamPass { | |||
| public: | |||
| @@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||
| return SUCCESS; | |||
| } | |||
| if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | |||
| GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); | |||
| GELOGE(FAILED, "[Check][MemRangeStep]first mem_range_step:%ld less than 0,invalid," | |||
| "maybe has dynamic shape in graph", all_memory_size.front()); | |||
| REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid," | |||
| "maybe has dynamic shape in graph", all_memory_size.front()); | |||
| return FAILED; | |||
| } | |||
| // Memory size is 512 aligned, so it is not necessary to take less than 512 | |||
| @@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||
| GELOGD("Range number: %zu", range_number); | |||
| vector<vector<int64_t>> ranges(range_number); | |||
| GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | |||
| GE_CHK_BOOL_EXEC((range_number != 0), | |||
| REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid"); | |||
| return PARAM_INVALID, | |||
| "[Check][RangeNumber]inner data is 0, judge invalid."); | |||
| size_t range_number_limit = all_memory_size.size() / range_number; | |||
| int64_t range_ceil = min_memory_size; | |||
| for (size_t i = 1; i <= range_number; i++) { | |||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval), | |||
| GELOGE(FAILED, "Multiply result is out of range."); | |||
| GELOGE(FAILED, "[Check][MemRangeCeil]Multiply result is out of range," | |||
| "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||
| REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range," | |||
| "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||
| return FAILED); | |||
| range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. | |||
| for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | |||
| @@ -30,6 +30,7 @@ | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| @@ -457,7 +458,16 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { | |||
| DataType data_type = output_op_desc->GetDataType(); | |||
| graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(graph_status, "CalcTensorMemSize failed!"); | |||
| GELOGE(graph_status, "[Calculate][TensorSize]shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||
| shape.ToString().c_str(), | |||
| TypeUtils::FormatToSerialString(format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||
| desc.GetName().c_str(), index); | |||
| REPORT_CALL_ERROR("E19999", "CalcTensorMemSize fail, shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||
| shape.ToString().c_str(), | |||
| TypeUtils::FormatToSerialString(format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||
| desc.GetName().c_str(), index); | |||
| return FAILED; | |||
| } | |||
| size = static_cast<size_t>(tensor_size); | |||
| @@ -498,7 +508,7 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<stri | |||
| symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} | |||
| BlockMemAssigner::~BlockMemAssigner() { | |||
| GELOGD("blocks_store_ size : %lu", blocks_store_.size()); | |||
| GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size()); | |||
| for (MemoryBlock *memory_block : blocks_store_) { | |||
| GE_DELETE_NEW_SINGLE(memory_block); | |||
| } | |||
| @@ -586,8 +596,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
| GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | |||
| int64_t size = 0; | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", | |||
| node_op_desc->GetName().c_str(), size); | |||
| GE_IF_BOOL_EXEC(size < 0, | |||
| GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, " | |||
| "maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, " | |||
| "maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| return;); | |||
| batch_all_memory_size[batch_label].emplace_back(size); | |||
| if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||
| @@ -678,22 +693,34 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||
| if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | |||
| auto out_anchor = n->GetOutDataAnchor(out_index); | |||
| GE_IF_BOOL_EXEC(out_anchor == nullptr, | |||
| GELOGE(FAILED, "Node[%s] output[%u] anchor is null.", n->GetName().c_str(), out_index); | |||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||
| GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | |||
| GELOGE(FAILED, "Node[%s] output[%u] peer_in_anchor 0 is null.", n->GetName().c_str(), out_index); | |||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| auto peer_node = peer_in_anchor->GetOwnerNode(); | |||
| GE_IF_BOOL_EXEC(peer_node == nullptr, | |||
| GELOGE(FAILED, "Node[%s] output[%u] node is null.", n->GetName().c_str(), out_index); | |||
| GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| // Get the continuous input type of the node, default is false | |||
| bool is_input_continuous = false; | |||
| auto peer_in_node_desc = peer_node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | |||
| GELOGE(FAILED, "Node[%s] output[%u] nodedesc is null.", n->GetName().c_str(), out_index); | |||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| // If GetBool fail, is_input_continuous is false. | |||
| @@ -793,7 +820,10 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & | |||
| if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | |||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | |||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | |||
| GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); | |||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false; | |||
| } | |||
| auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | |||
| @@ -1077,7 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
| OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | |||
| const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | |||
| const bool continuous, int64_t memory_type) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | |||
| std::string batch_label; | |||
| @@ -1129,7 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
| } | |||
| auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| block == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||
| n->GetName().c_str(), out_index); | |||
| return nullptr, | |||
| "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||
| // Data and netoutput need zero copy block | |||
| block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | |||
| @@ -1188,9 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu | |||
| Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||
| return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| // continuous output support ref only when all output ref input | |||
| bool isAllOutputRef = true; | |||
| @@ -1204,7 +1248,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| } | |||
| if (!isAllOutputRef && isOutputHasRef) { | |||
| GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||
| REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | |||
| n->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | |||
| n->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1215,7 +1261,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| if (output_op_desc == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1226,7 +1274,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| int64_t size = 0; | |||
| if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| size_t align_size = static_cast<size_t>(size); | |||
| @@ -1266,7 +1316,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| block->last_continuous_block_ = true; | |||
| ++(block->ref_count_); | |||
| } else { | |||
| GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | |||
| n->GetName().c_str(), total_size); | |||
| GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| return SUCCESS; | |||
| @@ -1274,25 +1326,44 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem, const bool continuous) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| MemoryBlock *block = nullptr; | |||
| NodeIndexIO node_index_io(n, index, kOut); | |||
| int64_t size = 0; | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| output_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| size_t no_align_size = 0; | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||
| return nullptr, "Get no align size failed"); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||
| REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| return nullptr, | |||
| "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| std::string symbol; | |||
| bool reuse_input = false; | |||
| if (IsSymbolExist(node_index_io, symbol)) { | |||
| block = symbol_blocks_[symbol]; | |||
| GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC(block == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| return nullptr); | |||
| // reduce old size | |||
| size_t align_size = block->Size(); | |||
| AlignMemOffset(align_size); | |||
| @@ -1335,12 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
| vector<bool> workspace_reuse_flag; | |||
| block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | |||
| workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| block == nullptr, | |||
| REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||
| n->GetName().c_str(), block_size, index); | |||
| return nullptr, | |||
| "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||
| n->GetName().c_str(), block_size, index); | |||
| } | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | |||
| int out_count = 0; | |||
| GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| index >= n->GetAllOutDataAnchors().size(), | |||
| REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| return nullptr); | |||
| auto out_data_anchor = n->GetOutDataAnchor(index); | |||
| GE_IF_BOOL_EXEC(out_data_anchor == nullptr, GELOGE(FAILED, "Out data anchor is nullptr."); return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| out_data_anchor == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| return nullptr); | |||
| for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
| auto owner_node = in_anchor->GetOwnerNode(); | |||
| auto op_desc = owner_node->GetOpDesc(); | |||
| @@ -1546,8 +1633,14 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | |||
| op_desc->GetOutputsSize(), memorys_type.size()); | |||
| if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | |||
| GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | |||
| op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); | |||
| REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| GELOGE( | |||
| INTERNAL_ERROR, | |||
| "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1673,8 +1766,12 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
| temp.size(), tvm_workspace_memory_type.size()); | |||
| if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | |||
| GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", | |||
| n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); | |||
| REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||
| temp.size(), n->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||
| temp.size(), n->GetName().c_str()); | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < temp.size(); i++) { | |||
| @@ -2059,7 +2156,7 @@ void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) { | |||
| Status BlockMemAssigner::Assign() { | |||
| vector<int64_t> ranges; | |||
| if (GetMemoryRanges(ranges) != SUCCESS) { | |||
| GELOGE(FAILED, "GetMemoryRanges Fail!"); | |||
| GELOGE(FAILED, "[Get][MemoryRanges] Fail!"); | |||
| return FAILED; | |||
| } | |||
| GE_IF_BOOL_EXEC(ranges.empty(), return SUCCESS); | |||
| @@ -2083,8 +2180,12 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||
| bool has_workspace_mem_type_attr = | |||
| ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | |||
| if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | |||
| GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", | |||
| node->GetName().c_str(), index, workspace_memory_type.size()); | |||
| REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | |||
| "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), | |||
| workspace_memory_type.size(), node->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | |||
| @@ -99,7 +99,8 @@ Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||
| Status GraphMemoryAssigner::AssignMemory() { | |||
| ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | |||
| if (mem_assigner->Assign() != ge::SUCCESS) { | |||
| GELOGE(ge::FAILED, "Memory assigner failed"); | |||
| GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s", | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | |||
| @@ -115,7 +116,10 @@ Status GraphMemoryAssigner::AssignMemory() { | |||
| auto variable_assigner = | |||
| std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | |||
| if (variable_assigner == nullptr) { | |||
| GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||
| GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -134,7 +138,10 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { | |||
| auto variable_assigner = | |||
| std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | |||
| if (variable_assigner == nullptr) { | |||
| GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||
| GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) { | |||
| @@ -147,8 +154,10 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||
| auto variable_assigner = | |||
| std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | |||
| if (variable_assigner == nullptr) { | |||
| GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||
| return ge::FAILED; | |||
| GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| } | |||
| if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | |||
| return ge::FAILED; | |||
| @@ -161,17 +170,18 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||
| int64_t &batch_dim_num, int64_t &out_size) { | |||
| graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Opdesc GetSize failed!"); | |||
| GELOGE(FAILED, "[Get][TensorSize]"); | |||
| REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory"); | |||
| return FAILED; | |||
| } | |||
| GeShape output_shape = output_desc->GetShape(); | |||
| std::vector<int64_t> output_dims = output_shape.GetDims(); | |||
| if (dim_index >= static_cast<int64_t>(output_dims.size())) { | |||
| std::string error = "Invaild value" + FmtToStr(dim_index) + | |||
| " of attr _reuse_input_on_dim_index, which is out of data range [0," | |||
| + std::to_string(output_dims.size()) + ")"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||
| dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||
| GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||
| dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -187,14 +197,23 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||
| graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!"); | |||
| GELOGE(graph_status, "[Calc][TensorSize]"); | |||
| return FAILED; | |||
| } | |||
| if (output_mem_size < 0) { | |||
| std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) + | |||
| " is out of data range [0," + std::to_string(INT64_MAX) + "]"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. " | |||
| "shape:%s, format:%s, dtype:%s, maybe has dynamic shape", | |||
| output_mem_size, | |||
| output_shape.ToString().c_str(), | |||
| TypeUtils::FormatToSerialString(out_format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, " | |||
| "maybe has dynamic shape", | |||
| output_mem_size, | |||
| output_shape.ToString().c_str(), | |||
| TypeUtils::FormatToSerialString(out_format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -203,7 +222,10 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||
| Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | |||
| if (memory_offset_.empty()) { | |||
| GELOGE(FAILED, "memory_offset_ is empty."); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when ReAssignMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -218,8 +240,10 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||
| auto session_id = compute_graph_->GetSessionID(); | |||
| if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | |||
| GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset, | |||
| VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | |||
| GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, " | |||
| "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem", | |||
| total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(), | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| for (auto iter : mem_type_to_offset) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, | |||
| {std::to_string(iter.first), std::to_string(iter.second), "featuremap", | |||
| @@ -234,7 +258,13 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||
| Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | |||
| BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | |||
| GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;); | |||
| if (priority_assigner == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected when AssignZeroCopyMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; | |||
| @@ -254,8 +284,11 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||
| zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; | |||
| auto iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type[HBM]"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when AssignZeroCopyMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | |||
| @@ -304,7 +337,7 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||
| } | |||
| if (continuous_type != 0) { | |||
| GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); | |||
| GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type); | |||
| } | |||
| return continuous_type; | |||
| } | |||
| @@ -312,8 +345,9 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||
| Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, | |||
| int64_t &tensor_size, int64_t &nopadding_size) { | |||
| if ((op_desc == nullptr) || (output_desc == nullptr)) { | |||
| GELOGE(FAILED, "Input para is nullptr."); | |||
| return FAILED; | |||
| REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, " | |||
| "not expected when GetMemorySize"); | |||
| GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr"); | |||
| } | |||
| tensor_size = 0; | |||
| nopadding_size = 0; | |||
| @@ -322,7 +356,10 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||
| int64_t attr_dim_index; | |||
| bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); | |||
| if (!get_attr_dim_flag) { | |||
| GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); | |||
| REPORT_INNER_ERROR("E19999", "Get Attr:%s failed when GetMemorySize, op_name:%s", | |||
| ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s", | |||
| ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -330,17 +367,25 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||
| int64_t batch_dim_num = 1; | |||
| if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != | |||
| SUCCESS) { | |||
| GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s", | |||
| attr_dim_index, op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld", | |||
| op_desc->GetName().c_str(), attr_dim_index); | |||
| return FAILED; | |||
| } | |||
| } else { | |||
| if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { | |||
| GELOGE(FAILED, "GetSize failed."); | |||
| REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| if ((tensor_size < 0) || (nopadding_size < 0)) { | |||
| GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "GetMemorySize fail, " | |||
| "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||
| tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||
| tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| @@ -374,7 +419,7 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op | |||
| // If GetBool fail, is_peer_reference is false. | |||
| (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); | |||
| GE_IF_BOOL_EXEC(is_peer_reference, | |||
| std::string warning = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||
| std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||
| " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + | |||
| " is ref. There may be conflict between the two."; | |||
| GELOGW("%s", warning.c_str()); | |||
| @@ -404,7 +449,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| if (continuous_input) { | |||
| if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { | |||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), | |||
| "Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||
| "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str()) | |||
| } else { | |||
| nodes_stack.push_back(node); | |||
| } | |||
| @@ -413,10 +458,11 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | |||
| if (continuous_output) { | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), | |||
| "[Get][MemType]fail for node:%s", node->GetName().c_str()); | |||
| ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "Assign continuous output memory failed!"); | |||
| GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -427,14 +473,16 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| nodes_stack.pop_back(); | |||
| auto iter = node_2_continuous_type.find(node); | |||
| if (iter == node_2_continuous_type.end()) { | |||
| GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Inner data error when process continuous memory alloc for node:%s, " | |||
| "but has no continuous type", node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | |||
| "Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||
| "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str()) | |||
| } | |||
| for (auto pair : memory_offset_) { | |||
| GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||
| GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu.", pair.first, | |||
| pair.second.mem_offset_); | |||
| } | |||
| return ge::SUCCESS; | |||
| @@ -442,11 +490,13 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
| Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | |||
| int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | |||
| GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | |||
| GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str()); | |||
| auto iter = memory_offset_.find(memory_type); | |||
| if (iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | |||
| "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | |||
| memory_type, node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| // The head and tail of hcom continuous input should be added 512 | |||
| @@ -459,8 +509,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| vector<int64_t> output_list_this = op_desc->GetOutputOffset(); | |||
| if (output_list_this.empty()) { | |||
| std::string error = "node:" + FmtToStr(op_desc->GetName()) + "has no output offset"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected when assign continuous input memory", | |||
| node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); | |||
| @@ -480,8 +531,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| lx_fusion = lx_fusion && !offsets_of_fusion.empty(); | |||
| if (lx_fusion) { | |||
| if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) { | |||
| std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + | |||
| " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||
| std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||
| " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||
| " is out of range:" + FmtToStr(offsets_of_fusion.size()); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -497,7 +549,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; | |||
| vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); | |||
| if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { | |||
| std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||
| std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||
| " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||
| " is out of range:" + FmtToStr(output_list.size()); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -506,17 +560,17 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); | |||
| if (is_allocated_first_input) { | |||
| std::map<int32_t, int32_t> out2ins; | |||
| GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | |||
| GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str()); | |||
| // output is beginning offset, set offset for input; only support this case now | |||
| if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | |||
| auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | |||
| output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | |||
| peer_op_desc->SetOutputOffset(output_list); | |||
| GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | |||
| output_list_this.at(out2ins.begin()->first), peer_output_offset); | |||
| GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld", | |||
| node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, | |||
| peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset); | |||
| } else { | |||
| GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), | |||
| GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(), | |||
| out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); | |||
| } | |||
| // first input is beginning offset | |||
| @@ -542,7 +596,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| } | |||
| GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | |||
| "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||
| "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(), | |||
| peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | |||
| output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | |||
| is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | |||
| @@ -563,17 +617,32 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { | |||
| auto in_data_anchor_list = node->GetAllInDataAnchors(); | |||
| if (in_data_anchor_list.empty()) { | |||
| GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect when GetFirstInputPeerOutOutputOffset", | |||
| node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); | |||
| GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, " | |||
| "not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||
| node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str()); | |||
| return ge::FAILED); | |||
| auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); | |||
| GE_IF_BOOL_EXEC(peer_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, " | |||
| "not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||
| node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str()); | |||
| return ge::FAILED); | |||
| vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); | |||
| if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { | |||
| GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); | |||
| REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, " | |||
| "judge invalid when GetFirstInputPeerOutOutputOffset for node:%s", | |||
| peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s", | |||
| peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); | |||
| @@ -584,11 +653,18 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||
| uint32_t continuous_type) { | |||
| GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); | |||
| auto out_op_desc = node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | |||
| GE_IF_BOOL_EXEC(out_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "OpDesc is null, " | |||
| "not expect when AssignContinuousOutputMemory for node:%s", | |||
| node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str())); | |||
| vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | |||
| if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | |||
| GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||
| out_op_desc->GetOutputsSize(), output_list.size()); | |||
| REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s, " | |||
| "when AssignContinuousOutputMemory", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -647,14 +723,18 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
| map<string, vector<NodePtr>> connecting_output_atomic_nodes; | |||
| Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||
| GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s", | |||
| compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return status; | |||
| } | |||
| auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (mem_iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when ReAssignAtomicMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -670,7 +750,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
| vector<int64_t> mem_offset_end; | |||
| status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||
| GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||
| atomic_node->GetName().c_str()); | |||
| return status; | |||
| } | |||
| @@ -679,7 +759,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
| int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | |||
| if (atomic_mem_size != 0) { | |||
| GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | |||
| "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||
| "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||
| } | |||
| } | |||
| batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | |||
| @@ -690,7 +770,8 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
| for (auto &iter_batch : connecting_output_atomic_nodes) { | |||
| mem_iter->second.mem_offset_ = batch_atomic_mem_start; | |||
| if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) { | |||
| GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||
| GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed." | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | |||
| @@ -721,9 +802,10 @@ Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign( | |||
| // If GetBool fail, is_reference is false. | |||
| (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | |||
| if (is_reference) { | |||
| std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) + | |||
| " cannot have both atomic and is_reference attribute."; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, " | |||
| "not support now", peer_in_node_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, " | |||
| "not support now", peer_in_node_desc->GetName().c_str()); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| @@ -761,7 +843,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||
| // Assign atomic node output memory | |||
| Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); | |||
| GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| @@ -781,7 +863,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||
| ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | |||
| } | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||
| GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| } else { | |||
| @@ -794,8 +876,11 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||
| Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | |||
| auto iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when AssignConnectNetOutputAtomicMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| for (auto &node : connect_netoutput_nodes) { | |||
| @@ -811,13 +896,14 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> & | |||
| node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | |||
| vector<int64_t> mem_offset_end; | |||
| if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||
| GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||
| node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | |||
| if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { | |||
| GELOGE(FAILED, "Failed to set atomic attr separately."); | |||
| GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -842,8 +928,11 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { | |||
| vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | |||
| if (out_op_desc->GetOutputsSize() > output_list.size()) { | |||
| GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||
| out_op_desc->GetOutputsSize(), output_list.size()); | |||
| REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | |||
| "when AssignReferenceMemory", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -896,9 +985,12 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { | |||
| } | |||
| if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || | |||
| (peer_op_desc->GetType() == VARIABLE)) { | |||
| std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" + | |||
| FmtToStr(peer_op_desc->GetName()) + " is invalid, Constant/AippData/Variable is not supported"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), " | |||
| "this situation not supported now", | |||
| peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), " | |||
| "this situation not supported now", | |||
| peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| } | |||
| @@ -918,22 +1010,27 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||
| // Check atomic output | |||
| vector<int64_t> output_list = op_desc->GetOutputOffset(); | |||
| if (atomic_output_index.size() > output_list.size()) { | |||
| std::string error = "Op" + FmtToStr(node->GetName()) + | |||
| "'s size of atomic_output_index is more than the size of output_list"; | |||
| std::string error = | |||
| "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) + | |||
| " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list"; | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| auto output_list_size = static_cast<int64_t>(output_list.size()); | |||
| auto iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when AssignAtomicOutputMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| for (auto &output_index : atomic_output_index) { | |||
| if (output_index >= output_list_size) { | |||
| std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||
| " is more than the size" + FmtToStr(output_list_size) + " of output_list."; | |||
| std::string error = | |||
| "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) + | |||
| " is more than the size:" + FmtToStr(output_list_size) + " of output_list."; | |||
| GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| @@ -941,7 +1038,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||
| // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | |||
| bool is_assigned_mem = false; | |||
| if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | |||
| GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld", | |||
| node->GetName().c_str(), output_index); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -981,8 +1079,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||
| Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | |||
| bool &is_mem_assigned) { | |||
| if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | |||
| std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||
| " is more than the size of node's AllOutDataAnchors."; | |||
| std::string error = | |||
| "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) + | |||
| " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors."; | |||
| GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| @@ -1010,8 +1109,11 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||
| GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | |||
| auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (mem_type_iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when AssignOrdinaryAtomicWorkspaceMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | |||
| @@ -1032,8 +1134,9 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||
| auto workspace_index = static_cast<uint64_t>(info_iter.first); | |||
| auto workspace_size = info_iter.second; | |||
| if (workspace_index >= workspace_vector.size()) { | |||
| std::string error = "The workspace index" + FmtToStr(workspace_index) + | |||
| " is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector."; | |||
| std::string error = "The workspace index:" + FmtToStr(workspace_index) + | |||
| " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" + | |||
| op_desc->GetName().c_str(); | |||
| GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| @@ -1063,8 +1166,11 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||
| GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | |||
| auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | |||
| if (mem_type_iter == memory_offset_.end()) { | |||
| std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||
| "not expected when AssignFusionAtomicWorkspaceMemory, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | |||
| @@ -1095,7 +1201,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||
| sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | |||
| } | |||
| if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) { | |||
| GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s when AssignFusionAtomicWorkspaceMemory", | |||
| EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.", | |||
| EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -1106,7 +1215,7 @@ Status GraphMemoryAssigner::CheckOffset() { | |||
| std::map<std::string, std::string> anchor_to_symbol; | |||
| std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | |||
| if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||
| @@ -1148,7 +1257,6 @@ Status GraphMemoryAssigner::CheckOffset() { | |||
| std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) + | |||
| + " in node" + FmtToStr(node->GetName()); | |||
| GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||
| GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -1158,8 +1266,10 @@ Status GraphMemoryAssigner::CheckOffset() { | |||
| ge::Status GraphMemoryAssigner::SetInputOffset() { | |||
| if (memory_offset_.empty()) { | |||
| GELOGE(FAILED, "memory_offset_ is empty."); | |||
| return FAILED; | |||
| REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||
| "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||
| } | |||
| for (auto pair : memory_offset_) { | |||
| GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | |||
| @@ -1168,7 +1278,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { | |||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||
| if (UpdateOpInputOffset(node) != ge::SUCCESS) { | |||
| GELOGE(ge::FAILED, "Update op input offset failed"); | |||
| GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str()); | |||
| return ge::FAILED; | |||
| } | |||
| } | |||
| @@ -1316,12 +1426,12 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { | |||
| } | |||
| } else if (node->GetType() == DATA_TYPE) { | |||
| if (UpdateConstArgsOffset(node, input_list) != SUCCESS) { | |||
| GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } else { | |||
| if (UpdateOpInputOffset(node, input_list) != SUCCESS) { | |||
| GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -1361,7 +1471,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||
| peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | |||
| if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||
| if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { | |||
| GELOGE(FAILED, "Set atomic clean attr failed."); | |||
| GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -1387,7 +1497,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||
| (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||
| mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||
| GELOGE(FAILED, "SetListInt failed."); | |||
| REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||
| ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||
| ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||
| return FAILED); | |||
| std::vector<int64_t> mem_size_vector; | |||
| @@ -1395,7 +1508,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||
| (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||
| mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||
| GELOGE(FAILED, "SetListInt failed."); | |||
| REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||
| ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||
| GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||
| ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||
| return FAILED); | |||
| std::stringstream ss; | |||
| @@ -1437,12 +1553,14 @@ ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nod | |||
| // In the dynamic batch scenario, the memory attributes of nodes are the same. | |||
| for (auto &n : nodes) { | |||
| if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.") | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), | |||
| "[Get][MemType:input]fail for node:%s", n->GetName().c_str()) | |||
| break; | |||
| } | |||
| if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), | |||
| "[Get][MemType:output]fail for node:%s", n->GetName().c_str()) | |||
| break; | |||
| } | |||
| } | |||
| @@ -1478,7 +1596,7 @@ ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t & | |||
| } | |||
| if (!CheckContinuousMemType(mem_type_list)) { | |||
| GELOGE(FAILED, "Check continuous memory type failed."); | |||
| GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| // It is continuous memory and memory type is the same, so use the first memory. | |||
| @@ -1526,7 +1644,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3 | |||
| if (node->GetInDataAnchor(reuse_in_index) != nullptr) { | |||
| out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); | |||
| } else { | |||
| GELOGE(FAILED, "Invalid reuse_input value %d on output %d of node %s, please check attr reuse_input", | |||
| REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, " | |||
| "please check attr reuse_input", | |||
| reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, " | |||
| "please check attr reuse_input", | |||
| reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| @@ -1549,7 +1671,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||
| auto continuous_type = iter->second; | |||
| bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | |||
| if (continuous_input) { | |||
| GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.", | |||
| GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory", | |||
| input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| @@ -1559,7 +1681,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||
| node_2_continuous_type.emplace(out_node, continuous_type); | |||
| bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | |||
| if (continuous_input) { | |||
| GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.", | |||
| GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory", | |||
| input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| @@ -1575,11 +1697,12 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||
| int64_t mem_clean_size = 0; | |||
| int64_t memory_type = RT_MEMORY_HBM; | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | |||
| GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), | |||
| "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str()); | |||
| auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | |||
| continuous_type, reverse_refresh); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "Assign continuous input memory failed!"); | |||
| GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| @@ -1590,7 +1713,6 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||
| if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { | |||
| // check whether there is an atomic conflict between the current node and the peer out node | |||
| if (!CheckInputIsSupportAtomic(input_continuous_node)) { | |||
| GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); | |||
| return ge::FAILED; | |||
| } | |||
| @@ -1602,7 +1724,7 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||
| if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | |||
| ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||
| GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -261,7 +261,9 @@ Status ModelBuilder::SetInputOutputDesc() { | |||
| GE_IF_BOOL_EXEC(n->GetInAllNodes().empty() && n->GetOutAllNodes().empty(), continue;); | |||
| SetInputIsConst(n); | |||
| if (IsGeLocalOp(n->GetOpDesc())) { | |||
| bool is_unknow = false; | |||
| (void)NodeUtils::GetNodeUnknownShapeStatus(*n, is_unknow); | |||
| if ((IsGeLocalOp(n->GetOpDesc())) && (!is_unknow)) { | |||
| GE_CHK_STATUS_RET(CalcOutputSize(n), "Calculate output size failed"); | |||
| } | |||
| ret = AdjustConstWeightSize(n, weight_offset_); | |||
| @@ -364,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() { | |||
| string buffer_optimize = "off_optimize"; | |||
| graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | |||
| if (ret == GRAPH_SUCCESS) { | |||
| is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize"); | |||
| GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str()); | |||
| bool off_superkernel = false; | |||
| (void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel); | |||
| is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel)); | |||
| GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(), | |||
| BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_); | |||
| } else { | |||
| GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str()); | |||
| } | |||
| @@ -707,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||
| GE_TIMESTAMP_START(SetInputOutputOffset); | |||
| SetInputOutputOffsetPass input_output_offset; | |||
| GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); | |||
| GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); | |||
| GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run"); | |||
| // Compile single op in graph build stage | |||
| GE_TIMESTAMP_START(CompileSingleOp); | |||
| @@ -361,6 +361,10 @@ Status GraphExecutor::ExecuteGraphWithStream(GraphId graph_id, | |||
| std::vector<GeTensor> &output_tensor, | |||
| rtStream_t stream) { | |||
| GELOGI("[GraphExecutor] Start to execute graph with stream, graph_id=%u", graph_id); | |||
| if (!init_flag_) { | |||
| GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[GraphExecutor] AI Core Engine without calling SetCondition!"); | |||
| return GE_GRAPH_EXECUTE_NOT_INIT; | |||
| } | |||
| if (graph_id != last_graph_id_) { | |||
| auto ret = FreeExecuteMemory(); | |||
| if (ret != SUCCESS) { | |||
| @@ -368,11 +372,6 @@ Status GraphExecutor::ExecuteGraphWithStream(GraphId graph_id, | |||
| } | |||
| } | |||
| last_graph_id_ = graph_id; | |||
| if (!init_flag_) { | |||
| GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[GraphExecutor] AI Core Engine without calling SetCondition!"); | |||
| return GE_GRAPH_EXECUTE_NOT_INIT; | |||
| } | |||
| GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED); | |||
| auto model_manager = ge::ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| @@ -382,11 +381,11 @@ Status GraphExecutor::ExecuteGraphWithStream(GraphId graph_id, | |||
| return model_manager->SyncExecuteModel(model_id, input_tensor, output_tensor); | |||
| } | |||
| std::vector<InputOutputDescInfo> inputs_desc; | |||
| std::vector<InputOutputDescInfo> input_desc; | |||
| std::vector<InputOutputDescInfo> output_desc; | |||
| GELOGI("[ExecuteGraph] GetInputOutputDescInfo via new ome begin."); | |||
| Status ret = GetInputOutputDescInfo(model_id, inputs_desc, output_desc); | |||
| Status ret = GetInputOutputDescInfo(model_id, input_desc, output_desc); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(GE_GRAPH_GET_IN_OUT_FAILED, "[GraphExecutor] GetInputOutputDescInfo failed, modelId=%u.", model_id); | |||
| return GE_GRAPH_GET_IN_OUT_FAILED; | |||
| @@ -385,7 +385,7 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf | |||
| Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
| const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | |||
| const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||
| const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||
| if (output_descs.size() != output_addrs.size()) { | |||
| GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | |||
| inner_dump_info.op->GetName().c_str(), output_descs.size()); | |||
| @@ -436,7 +436,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
| // else data, const or variable op | |||
| aicpu::dump::Output output; | |||
| auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | |||
| const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||
| const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||
| if (output_tensor == nullptr) { | |||
| GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | |||
| inner_dump_info.op->GetOutputsSize()); | |||
| @@ -540,7 +540,7 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info | |||
| Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
| GELOGI("Start dump input"); | |||
| const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | |||
| const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op); | |||
| const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op); | |||
| if (input_descs.size() != input_addrs.size()) { | |||
| GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | |||
| inner_dump_info.op->GetName().c_str(), input_descs.size()); | |||
| @@ -36,7 +36,7 @@ | |||
| namespace ge { | |||
| class DataDumper { | |||
| public: | |||
| explicit DataDumper(const RuntimeParam &rsh) | |||
| explicit DataDumper(RuntimeParam *rsh) | |||
| : model_name_(), | |||
| model_id_(0), | |||
| runtime_param_(rsh), | |||
| @@ -106,7 +106,7 @@ class DataDumper { | |||
| std::string om_name_; | |||
| uint32_t model_id_; | |||
| const RuntimeParam &runtime_param_; | |||
| RuntimeParam *runtime_param_; | |||
| void *dev_mem_load_; | |||
| void *dev_mem_unload_; | |||
| @@ -125,10 +125,9 @@ class DataDumper { | |||
| uintptr_t loop_per_iter_; | |||
| uintptr_t loop_cond_; | |||
| ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||
| std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
| std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
| void *l1_fusion_addr_ = nullptr; | |||
| uint32_t op_debug_task_id_ = 0; | |||
| uint32_t op_debug_stream_id_ = 0; | |||
| void *op_debug_addr_ = nullptr; | |||
| @@ -144,20 +143,16 @@ class DataDumper { | |||
| Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
| Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
| Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | |||
| const std::string &node_name_index); | |||
| const std::string &node_name_index); | |||
| Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | |||
| aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| Status GenerateInput(aicpu::dump::Input &input, | |||
| const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, | |||
| size_t index); | |||
| Status GenerateOutput(aicpu::dump::Output &output, | |||
| const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, | |||
| size_t index); | |||
| Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, size_t index); | |||
| Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, size_t index); | |||
| void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | |||
| }; | |||
| struct DataDumper::InnerDumpInfo { | |||
| @@ -31,6 +31,7 @@ | |||
| #include "common/scope_guard.h" | |||
| #include "common/thread_pool.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/compute_graph.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| @@ -184,7 +185,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||
| last_execute_mode_(INITIALIZATION), | |||
| session_id_(0), | |||
| device_id_(0), | |||
| maxDumpOpNum_(0), data_dumper_(runtime_param_), | |||
| maxDumpOpNum_(0), data_dumper_(&runtime_param_), | |||
| iterator_count_(0), | |||
| is_l1_fusion_enable_(false), | |||
| is_first_execute_(true) { | |||
| @@ -232,6 +233,8 @@ DavinciModel::~DavinciModel() { | |||
| FreeP2PMem(); | |||
| OpDebugUnRegister(); | |||
| if (l1_fusion_addr_ != nullptr) { | |||
| GE_CHK_RT(rtFree(l1_fusion_addr_)); | |||
| } | |||
| @@ -242,8 +245,6 @@ DavinciModel::~DavinciModel() { | |||
| } | |||
| } | |||
| OpDebugUnRegister(); | |||
| ReleaseTask(); | |||
| CleanTbeHandle(); | |||
| @@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() { | |||
| GE_CHK_STATUS(task->Release(), "Release task failed."); | |||
| } | |||
| } | |||
| for (auto &item : label_goto_args_) { | |||
| GE_FREE_RT_LOG(item.second.first); | |||
| } | |||
| label_goto_args_.clear(); | |||
| } | |||
| Status DavinciModel::Assign(const GeModelPtr &ge_model) { | |||
| @@ -532,20 +538,20 @@ Status DavinciModel::DoTaskSink() { | |||
| GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | |||
| if (known_node_) { | |||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed"); | |||
| } | |||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||
| GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | |||
| @@ -568,77 +574,21 @@ Status DavinciModel::SetTSDevice() { | |||
| } | |||
| Status DavinciModel::OpDebugRegister() { | |||
| bool is_op_debug = false; | |||
| (void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug); | |||
| GELOGD("The value of op debug in ge_model is %d.", is_op_debug); | |||
| if (is_op_debug) { | |||
| debug_reg_mutex_.lock(); | |||
| rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
| // For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
| rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint32_t op_debug_mode = 0; | |||
| (void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode); | |||
| GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode); | |||
| uint32_t debug_task_id = 0; | |||
| uint32_t debug_stream_id = 0; | |||
| rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| if (GetDumpProperties().IsOpDebugOpen()) { | |||
| uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode(); | |||
| auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret); | |||
| return ret; | |||
| } | |||
| GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id); | |||
| is_op_debug_reg_ = true; | |||
| data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::OpDebugUnRegister() { | |||
| if (is_op_debug_reg_) { | |||
| debug_reg_mutex_.unlock(); | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| if (rt_model_handle_ != nullptr) { | |||
| GELOGD("start call debug_unregister."); | |||
| rt_ret = rtDebugUnRegister(rt_model_handle_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| opdebug_register_.UnregisterDebugForModel(rt_model_handle_); | |||
| is_op_debug_reg_ = false; | |||
| } | |||
| return; | |||
| @@ -710,12 +660,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| runtime_param_.graph_id = compute_graph->GetGraphID(); | |||
| // op debug register | |||
| GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed."); | |||
| GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed"); | |||
| GE_TIMESTAMP_START(TransAllVarData); | |||
| GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed."); | |||
| GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed"); | |||
| GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData"); | |||
| GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | |||
| GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed"); | |||
| GE_TIMESTAMP_START(InitModelMem); | |||
| GELOGD("Known node is %d.", known_node_); | |||
| @@ -723,7 +673,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| if (!known_node_) { | |||
| GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | |||
| data_inputer_ = new (std::nothrow) DataInputer(); | |||
| GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | |||
| GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr"); | |||
| } | |||
| fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_); | |||
| GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); | |||
| @@ -1390,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||
| } | |||
| } | |||
| Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) { | |||
| std::lock_guard<std::mutex> lock(label_args_mutex_); | |||
| auto it = label_goto_args_.find(label_index); | |||
| if (it != label_goto_args_.end()) { | |||
| arg_addr = it->second.first; | |||
| arg_size = it->second.second; | |||
| return SUCCESS; | |||
| } | |||
| if (label_index >= label_list_.size()) { | |||
| GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GE_CHECK_NOTNULL(label_list_[label_index]); | |||
| vector<rtLabel_t> label_used = { label_list_[label_index] }; | |||
| arg_size = label_used.size() * sizeof(rtLabelDevInfo); | |||
| rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| label_goto_args_[label_index] = { arg_addr, arg_size }; | |||
| rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| /// @ingroup ge | |||
| /// @brief LabelSet Op Initialize. | |||
| /// @param [in] op_desc: LabelSet Op descriptor. | |||
| @@ -2966,16 +2949,14 @@ Status DavinciModel::MallocKnownArgs() { | |||
| return ret; | |||
| } | |||
| } | |||
| rtError_t rt_ret; | |||
| // malloc args memory | |||
| if (total_args_size_ == 0) { | |||
| GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); | |||
| return SUCCESS; | |||
| } | |||
| rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| if (total_args_size_ != 0) { | |||
| rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| // malloc dynamic and static hybrid memory | |||
| if (total_hybrid_args_size_ != 0) { | |||
| @@ -3891,7 +3872,7 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||
| } | |||
| std::vector<NodePtr> variable_node_list; | |||
| for (ge::NodePtr &node : graph->GetDirectNode()) { | |||
| for (ge::NodePtr &node : graph->GetAllNodes()) { | |||
| if (node == nullptr) { | |||
| continue; | |||
| } | |||
| @@ -29,6 +29,7 @@ | |||
| #include "common/helper/om_file_helper.h" | |||
| #include "common/opskernel/ge_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/dump/opdebug_register.h" | |||
| #include "common/types.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| @@ -272,6 +273,8 @@ class DavinciModel { | |||
| const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | |||
| Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size); | |||
| Status DestroyThread(); | |||
| // get Op | |||
| @@ -929,6 +932,9 @@ class DavinciModel { | |||
| vector<rtLabel_t> label_list_; | |||
| set<uint32_t> label_id_indication_; | |||
| mutex label_args_mutex_; | |||
| map<uint32_t, pair<void *, uint32_t>> label_goto_args_; | |||
| mutex outside_addrs_mutex_; | |||
| vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
| set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
| @@ -984,6 +990,7 @@ class DavinciModel { | |||
| int64_t maxDumpOpNum_; | |||
| // for data dump | |||
| DataDumper data_dumper_; | |||
| OpdebugRegister opdebug_register_; | |||
| uint64_t iterator_count_; | |||
| bool is_l1_fusion_enable_; | |||
| map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | |||
| @@ -1021,8 +1028,6 @@ class DavinciModel { | |||
| // for op debug | |||
| mutex debug_reg_mutex_; | |||
| bool is_op_debug_reg_ = false; | |||
| void *op_debug_addr_ = nullptr; | |||
| void *p2p_debug_addr_ = nullptr; | |||
| bool is_online_infer_dynamic_ = false; | |||
| bool is_getnext_sink_dynamic_ = false; | |||
| vector<int32_t> cur_dynamic_dims_; | |||
| @@ -286,6 +286,17 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string | |||
| return SUCCESS; | |||
| } | |||
| bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) { | |||
| auto root_graph = ge_root_model.GetRootGraph(); | |||
| if (root_graph == nullptr) { | |||
| GELOGE(FAILED, "no model on root model"); | |||
| return false; | |||
| } | |||
| bool is_shape_unknown = root_graph->GetGraphUnknownFlag(); | |||
| bool is_dsp_partitioned_graph = false; | |||
| (void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph); | |||
| return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag(); | |||
| } | |||
| /// | |||
| /// @ingroup domi_ome | |||
| /// @brief load model online | |||
| @@ -297,12 +308,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| if (model_id == INVALID_MODEL_ID) { | |||
| GenModelId(&model_id); | |||
| } | |||
| bool is_shape_unknown = false; | |||
| auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| string model_name = ""; | |||
| GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", | |||
| model_id); | |||
| if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||
| if (IsNeedHybridLoad(*ge_root_model)) { | |||
| return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | |||
| } | |||
| @@ -324,7 +332,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| auto root_graph = ge_root_model->GetRootGraph(); | |||
| GE_CHECK_NOTNULL(root_graph); | |||
| string root_model_name = root_graph->GetName(); | |||
| auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| GeModelPtr ge_model = name_to_model[root_model_name]; | |||
| Status ret = SUCCESS; | |||
| do { | |||
| @@ -294,6 +294,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| std::vector<InputOutputDims> &output_dims); | |||
| bool IsDynamicShape(uint32_t model_id); | |||
| bool IsNeedHybridLoad(ge::GeRootModel &ge_root_model); | |||
| ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||
| ge::Status EnableExceptionDump(const std::map<string, string> &options); | |||
| @@ -340,6 +341,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| void GenModelId(uint32_t *id); | |||
| std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_; | |||
| std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | |||
| std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_; | |||
| @@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc | |||
| switch (mem_type) { | |||
| case RT_MEMORY_RDMA_HBM: | |||
| if (offset < 0) { | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", | |||
| reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset))); | |||
| return PARAM_INVALID; | |||
| } | |||
| var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | |||
| @@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
| return FAILED; | |||
| } | |||
| ret = InitTVMTask(args_offset_tmp[0], kernel_def); | |||
| io_addr_offset_ = args_offset_tmp[0]; | |||
| ret = InitTVMTask(io_addr_offset_, kernel_def); | |||
| } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | |||
| ret = InitAICPUCustomTask(context.op_index(), kernel_def); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| @@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() { | |||
| GELOGD("KernelTaskInfo Distribute Start."); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||
| } | |||
| @@ -449,29 +451,39 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||
| } | |||
| } | |||
| Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) { | |||
| GE_CHECK_NOTNULL(davinci_model_); | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs_); | |||
| auto addr_size = kAddrLen * io_addrs_.size(); | |||
| // copy io addr | |||
| errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs_.data(), addr_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_); | |||
| return SUCCESS; | |||
| } | |||
| Status KernelTaskInfo::UpdateArgs() { | |||
| GELOGI("KernelTaskInfo::UpdateArgs in."); | |||
| GE_CHECK_NOTNULL(davinci_model_); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| if (l2_buffer_on_) { | |||
| return CopyNoncontinuousArgs(io_addr_offset_); | |||
| } | |||
| davinci_model_->SetTotalIOAddrs(io_addrs_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| vector<void *> io_addrs = io_addrs_; | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | |||
| } | |||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -516,8 +528,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| return SUCCESS; | |||
| } | |||
| char *sm_contrl = const_cast<char *>(sm_desc.data()); | |||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl); | |||
| char *sm_control = const_cast<char *>(sm_desc.data()); | |||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control); | |||
| uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | |||
| // There is no weight for te op now. Update L2_mirror_addr by data memory base. | |||
| @@ -545,19 +557,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| return SUCCESS; | |||
| } | |||
| void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| } | |||
| void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(args_size); | |||
| } | |||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| if (kernel_def.sm_desc().empty()) { | |||
| SetContinuousArgs(args_size, davinci_model); | |||
| return SUCCESS; | |||
| } | |||
| l2_buffer_on_ = true; | |||
| SetNoncontinuousArgs(args_size, davinci_model); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||
| SetNoncontinuousArgs(args_size, davinci_model); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -568,8 +592,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| // get tvm op desc | |||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| Status ge_ret = UpdateL2Data(kernel_def); | |||
| // update origin l2 data | |||
| if (ge_ret != SUCCESS) { | |||
| return ge_ret; | |||
| } | |||
| if (davinci_model_->IsKnownNode()) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| InitDumpTask(offset); | |||
| return SUCCESS; | |||
| } | |||
| @@ -609,12 +648,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| vector<uint8_t> args_info(args_size_); | |||
| errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | |||
| GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | |||
| @@ -628,7 +661,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||
| sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||
| kAddrLen * tensor_device_addrs.size()); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| @@ -640,19 +673,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | |||
| "Op debug is open in TVM task info"); | |||
| Status ge_ret = UpdateL2Data(kernel_def); | |||
| // update origin l2 data | |||
| if (ge_ret != SUCCESS) { | |||
| return ge_ret; | |||
| } | |||
| vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| if (op_desc->GetType() == ATOMICADDRCLEAN) { | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| } | |||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); | |||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset); | |||
| GELOGD("Do InitTVMTask end"); | |||
| return SUCCESS; | |||
| @@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo { | |||
| bool IsL1FusionOp(const OpDescPtr &op_desc); | |||
| void SetIoAddrs(const OpDescPtr &op_desc); | |||
| void InitDumpTask(uint32_t offset); | |||
| void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||
| void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||
| Status CopyNoncontinuousArgs(uint16_t offset); | |||
| // For super kernel | |||
| Status SaveSKTDumpInfo(); | |||
| @@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo { | |||
| uint32_t hybrid_args_offset_ = 0; | |||
| int64_t fixed_addr_offset_ = 0; | |||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||
| uint16_t io_addr_offset_ = 0; | |||
| bool l2_buffer_on_ = false; | |||
| bool call_save_dump_ = false; | |||
| // aicpu ext_info device mem | |||
| @@ -17,9 +17,15 @@ | |||
| #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| constexpr uint8_t kGotoBranchMax = 1; | |||
| LabelGotoExTaskInfo::~LabelGotoExTaskInfo() { | |||
| args_ = nullptr; | |||
| GE_FREE_RT_LOG(index_value_); | |||
| } | |||
| Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| GELOGI("LabelGotoExTaskInfo Init Start."); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| @@ -28,7 +34,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| return FAILED; | |||
| } | |||
| // Get LabelGoto task def | |||
| // Get LabelGotoEx task def | |||
| const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex(); | |||
| OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index()); | |||
| if (op_desc == nullptr) { | |||
| @@ -43,20 +49,38 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| return INTERNAL_ERROR; | |||
| } | |||
| const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||
| if (label_index >= label_list.size()) { | |||
| GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size()); | |||
| return INTERNAL_ERROR; | |||
| rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||
| GELOGI("memory_type: %u", memory_type); | |||
| GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_)); | |||
| rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| label_ = label_list[label_index]; | |||
| GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_); | |||
| uint64_t branch_index = 0; | |||
| rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index); | |||
| return SUCCESS; | |||
| } | |||
| Status LabelGotoExTaskInfo::Distribute() { | |||
| GELOGI("LabelGotoExTaskInfo Distribute Start."); | |||
| rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||
| GE_CHECK_NOTNULL(args_); | |||
| GE_CHECK_NOTNULL(index_value_); | |||
| if (args_size_ == 0) { | |||
| GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_); | |||
| return PARAM_INVALID; | |||
| } | |||
| rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -14,24 +14,26 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelGotoExTaskInfo : public TaskInfo { | |||
| public: | |||
| LabelGotoExTaskInfo() : label_(nullptr) {} | |||
| LabelGotoExTaskInfo() = default; | |||
| ~LabelGotoExTaskInfo() override { label_ = nullptr; } | |||
| ~LabelGotoExTaskInfo() override; | |||
| Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
| Status Distribute() override; | |||
| private: | |||
| void *label_; | |||
| void *index_value_{nullptr}; // switch index input. | |||
| void *args_{nullptr}; // label info memory. | |||
| uint32_t args_size_{0}; // label info length. | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| #endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| @@ -34,4 +34,4 @@ class LabelSetTaskInfo : public TaskInfo { | |||
| void *label_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| #endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||
| @@ -16,20 +16,13 @@ | |||
| #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| namespace ge { | |||
| constexpr uint8_t kLabelSwitchIndexNum = 1; | |||
| LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() { | |||
| if (args_ != nullptr) { | |||
| rtError_t ret = rtFree(args_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); | |||
| } | |||
| } | |||
| args_ = nullptr; | |||
| GE_FREE_RT_LOG(args_); | |||
| index_value_ = nullptr; | |||
| } | |||
| @@ -37,13 +30,12 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
| GELOGI("LabelSwitchByIndexTaskInfo Init Start."); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||
| Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | |||
| if (ret != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| // Get LabelSwitch task def | |||
| // Get LabelSwitchByIndex task def | |||
| const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index(); | |||
| OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index()); | |||
| if (op_desc == nullptr) { | |||
| @@ -68,7 +60,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
| davinci_model->DisableZeroCopy(index_value_); | |||
| std::vector<uint32_t> label_idx_list; | |||
| vector<uint32_t> label_idx_list; | |||
| if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) { | |||
| GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(), | |||
| ATTR_NAME_LABEL_SWITCH_LIST.c_str()); | |||
| @@ -81,7 +73,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
| return INTERNAL_ERROR; | |||
| } | |||
| label_list_.resize(branch_max_, nullptr); | |||
| vector<rtLabel_t> label_used(branch_max_, nullptr); | |||
| const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||
| for (size_t idx = 0; idx < label_idx_list.size(); ++idx) { | |||
| uint32_t label_id = label_idx_list[idx]; | |||
| if (label_id >= label_list.size()) { | |||
| @@ -90,8 +83,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GE_CHECK_NOTNULL(label_list[label_id]); | |||
| label_list_[idx] = label_list[label_id]; | |||
| label_used[idx] = label_list[label_id]; | |||
| } | |||
| rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||
| @@ -103,7 +95,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | |||
| rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -125,7 +117,7 @@ Status LabelSwitchByIndexTaskInfo::Distribute() { | |||
| rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_FAILED; | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGI("LabelSwitchByIndexTaskInfo Distribute Success."); | |||
| @@ -14,16 +14,15 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #include "graph/load/model_manager/task_info/task_info.h" | |||
| namespace ge { | |||
| class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
| public: | |||
| LabelSwitchByIndexTaskInfo() | |||
| : index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {} | |||
| LabelSwitchByIndexTaskInfo() = default; | |||
| ~LabelSwitchByIndexTaskInfo() override; | |||
| @@ -34,12 +33,11 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
| Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
| private: | |||
| void *index_value_; // switch index input. | |||
| uint32_t branch_max_; // max branch count. | |||
| void *args_; // label info memory. | |||
| uint32_t args_size_; // label info length. | |||
| std::vector<rtLabel_t> label_list_; | |||
| int64_t fixed_addr_offset_; | |||
| void *index_value_{nullptr}; // switch index input. | |||
| uint32_t branch_max_{0}; // max branch count. | |||
| void *args_{nullptr}; // label info memory. | |||
| uint32_t args_size_{0}; // label info length. | |||
| int64_t fixed_addr_offset_{0}; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| #endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||
| @@ -40,7 +40,7 @@ static bool BlockComparator(const Block *left, const Block *right) { | |||
| } | |||
| bool CanMerge(Block *block) { | |||
| if (block == nullptr || block->allocated || !block->IsSplit()) { | |||
| if ((block == nullptr) || block->allocated || !block->IsSplit()) { | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -52,7 +52,7 @@ size_t GetBinIndex(size_t size) { | |||
| if (size <= range) { | |||
| break; | |||
| } | |||
| ++index; | |||
| index++; | |||
| } | |||
| if (index > kNumBins - 1) { | |||
| index = kNumBins - 1; | |||
| @@ -87,15 +87,15 @@ bool ShouldSplit(const Block *block, size_t size) { | |||
| void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | |||
| auto it = count.find(size); | |||
| if (it != count.end()) { | |||
| it->second++; | |||
| } else { | |||
| if (it == count.end()) { | |||
| count.emplace(size, 1); | |||
| } else { | |||
| it->second++; | |||
| } | |||
| } | |||
| CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | |||
| for (uint32_t i = 0; i < kNumBins; ++i) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| free_block_bins_[i] = nullptr; | |||
| } | |||
| } | |||
| @@ -105,7 +105,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||
| // when redo Initialize free old memory | |||
| FreeBlocks(); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| for (uint32_t i = 0; i < kNumBins; ++i) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| if (free_block_bins_[i] != nullptr) { | |||
| continue; | |||
| } | |||
| @@ -132,18 +132,18 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||
| uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | |||
| GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | |||
| uint8_t *ptr = nullptr; | |||
| size = GetBlockSize(size); | |||
| uint8_t *ptr = nullptr; | |||
| Block *block = FindFreeBlock(size, org_ptr, device_id); | |||
| if (block != nullptr) { | |||
| ptr = block->ptr; | |||
| } else { | |||
| if (block == nullptr) { | |||
| if (ge::SUCCESS == TryExtendCache(size, device_id)) { | |||
| block = FindFreeBlock(size, org_ptr, device_id); | |||
| if (block != nullptr) { | |||
| ptr = block->ptr; | |||
| } | |||
| } | |||
| } else { | |||
| ptr = block->ptr; | |||
| } | |||
| if (ptr == nullptr) { | |||
| GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); | |||
| @@ -171,7 +171,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | |||
| } | |||
| void CachingAllocator::FreeBlock(Block *block) { | |||
| if (block == nullptr || !block->allocated) { | |||
| if ((block == nullptr) || !block->allocated) { | |||
| return; | |||
| } | |||
| GELOGI("Free block size = %zu", block->size); | |||
| @@ -187,7 +187,7 @@ void CachingAllocator::FreeBlock(Block *block) { | |||
| } | |||
| void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) { | |||
| if (!CanMerge(dst) || !CanMerge(src)) { | |||
| if (!CanMerge(src) || !CanMerge(dst)) { | |||
| return; | |||
| } | |||
| @@ -316,7 +316,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||
| GELOGI("Free cached blocks"); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| size_t free_cached_memory_size = 0; | |||
| for (uint32_t i = 0; i < kNumBins; ++i) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| auto pool = free_block_bins_[i]; | |||
| if (pool == nullptr) { | |||
| continue; | |||
| @@ -324,7 +324,8 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||
| for (auto it = pool->begin(); it != pool->end();) { | |||
| Block *block = *it; | |||
| // free block memory that has not been split | |||
| if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) && | |||
| if ((block != nullptr) && (block->ptr != nullptr) && | |||
| (block->prev == nullptr) && (block->next == nullptr) && | |||
| (memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) { | |||
| auto itcount = malloced_memory_.find(block->size); | |||
| free_cached_memory_size += block->size; | |||
| @@ -345,7 +346,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||
| } | |||
| void CachingAllocator::FreeBlocks() { | |||
| GELOGI("Free blocks"); | |||
| GELOGI("Free blocks."); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| // free allocated blocks and put to cache | |||
| for (auto &it : allocated_blocks_) { | |||
| @@ -356,9 +357,9 @@ void CachingAllocator::FreeBlocks() { | |||
| } | |||
| void CachingAllocator::FreeBlockBins() { | |||
| GELOGI("Free block bins"); | |||
| GELOGI("Free block bins."); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| for (uint32_t i = 0; i < kNumBins; ++i) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| if (free_block_bins_[i] != nullptr) { | |||
| delete free_block_bins_[i]; | |||
| free_block_bins_[i] = nullptr; | |||
| @@ -367,9 +368,9 @@ void CachingAllocator::FreeBlockBins() { | |||
| } | |||
| void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | |||
| GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count); | |||
| GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count); | |||
| for (auto &it : count) { | |||
| GELOGI(" |- block[size:%10zu count:%10zu]", it.first, it.second); | |||
| GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second); | |||
| } | |||
| } | |||
| @@ -383,20 +384,20 @@ void CachingAllocator::PrintStatics() { | |||
| size_t total_free_count = 0; | |||
| size_t total_malloc_size = 0; | |||
| size_t total_malloc_count = 0; | |||
| std::map<size_t, size_t> using_block; | |||
| std::map<size_t, size_t> free_block; | |||
| std::map<size_t, size_t> malloc_block; | |||
| std::map<size_t, size_t> using_block_stat; | |||
| std::map<size_t, size_t> free_block_stat; | |||
| std::map<size_t, size_t> malloc_block_stat; | |||
| do { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| for (uint32_t i = 0; i < kNumBins; ++i) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| auto pool = free_block_bins_[i]; | |||
| if (pool == nullptr) { | |||
| continue; | |||
| } | |||
| for (auto it = pool->begin(); it != pool->end(); ++it) { | |||
| for (auto it = pool->begin(); it != pool->end(); it++) { | |||
| if ((*it) != nullptr) { | |||
| total_free_size += (*it)->size; | |||
| IncreaseCount(free_block, (*it)->size); | |||
| IncreaseCount(free_block_stat, (*it)->size); | |||
| total_free_count++; | |||
| } | |||
| } | |||
| @@ -405,7 +406,7 @@ void CachingAllocator::PrintStatics() { | |||
| for (auto &it : allocated_blocks_) { | |||
| if (it.second != nullptr) { | |||
| total_using_size += it.second->size; | |||
| IncreaseCount(using_block, it.second->size); | |||
| IncreaseCount(using_block_stat, it.second->size); | |||
| total_using_count++; | |||
| } | |||
| } | |||
| @@ -413,12 +414,12 @@ void CachingAllocator::PrintStatics() { | |||
| for (auto &it : malloced_memory_) { | |||
| total_malloc_size += it.first * it.second; | |||
| total_malloc_count += it.second; | |||
| malloc_block[it.first] = it.second; | |||
| malloc_block_stat[it.first] = it.second; | |||
| } | |||
| } while (0); | |||
| PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count); | |||
| PrintCount(using_block, "Using", total_using_size, total_using_count); | |||
| PrintCount(free_block, "Free", total_free_size, total_free_count); | |||
| PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | |||
| PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | |||
| PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | |||
| } | |||
| } // namespace ge | |||
| @@ -93,6 +93,7 @@ | |||
| #include "graph/passes/global_step_insert_pass.h" | |||
| #include "graph/passes/memcpy_addr_async_pass.h" | |||
| #include "graph/passes/hccl_continuous_memcpy_pass.h" | |||
| #include "graph/passes/parallel_group_pass.h" | |||
| #include "graph/build/label_allocator.h" | |||
| #include "graph/utils/tensor_adapter.h" | |||
| #include "inc/pass_manager.h" | |||
| @@ -359,7 +360,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph); | |||
| GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed"); | |||
| return FAILED); | |||
| // update option about tuning graph | |||
| ParseOption(options, BUILD_MODE, options_.build_mode); | |||
| ParseOption(options, BUILD_STEP, options_.build_step); | |||
| ParseOption(options, TUNING_PATH, options_.tuning_path); | |||
| graph_node->SetGraph(graph_ptr); | |||
| graph_node->SetOptions(options); | |||
| AddGraphNode(graph_id, graph_node); | |||
| @@ -433,6 +437,10 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||
| GELOGE(FAILED, "GraphPtr make shared failed"); | |||
| return FAILED; | |||
| } | |||
| // update option about tuning graph | |||
| ParseOption(options, BUILD_MODE, options_.build_mode); | |||
| ParseOption(options, BUILD_STEP, options_.build_step); | |||
| ParseOption(options, TUNING_PATH, options_.tuning_path); | |||
| graph_node->SetGraph(graph_ptr); | |||
| graph_node->SetOptions(options); | |||
| @@ -1564,6 +1572,10 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||
| GE_IF_BOOL_EXEC(ret != SUCCESS, | |||
| GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); | |||
| return GE_GRAPH_OPTIONS_INVALID); | |||
| // Set Build model and step | |||
| ParseOption(options, BUILD_MODE, options_.build_mode); | |||
| ParseOption(options, BUILD_STEP, options_.build_step); | |||
| ParseOption(options, BUILD_STEP, options_.tuning_path); | |||
| // ge.graphType. | |||
| options_.run_graph_flag = true; | |||
| @@ -1612,10 +1624,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||
| GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d", | |||
| options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | |||
| // Set Build model and step | |||
| ParseOption(options, BUILD_MODE, options_.build_mode); | |||
| ParseOption(options, BUILD_STEP, options_.build_step); | |||
| return SUCCESS; | |||
| } | |||
| @@ -1647,6 +1655,7 @@ void GraphManager::ParseOption(const std::map<std::string, std::string> &options | |||
| std::string &option) { | |||
| auto iter = options.find(key); | |||
| if (iter != options.end()) { | |||
| GELOGD("Set option %s from value %s to value%s", key.c_str(), option.c_str(), iter->second.c_str()); | |||
| option = iter->second; | |||
| } | |||
| } | |||
| @@ -2471,6 +2480,12 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
| GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph), "Add memcpy_addr_async node failed."); | |||
| GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); | |||
| // Handle parallel group . | |||
| GE_TIMESTAMP_START(ParallelGroup); | |||
| ParallelGroupPass parallel_group_pass; | |||
| GE_CHK_STATUS_RET(parallel_group_pass.Run(compute_graph), "Handle parallel group failed."); | |||
| GE_TIMESTAMP_END(ParallelGroup, "ParallelGroupPass::Run."); | |||
| // After while sub graph handle, mark all node rw type | |||
| auto result = GetCompilerStages(compute_graph->GetGraphID()).optimizer.HandleMemoryRWConflict(compute_graph); | |||
| if (result != SUCCESS) { | |||
| @@ -2987,9 +3002,9 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| graph_manager->graph_executor_.SetTrainFlag(graph_manager->options_.train_graph_flag); | |||
| } | |||
| args.graph_node->SetRunFlag(false); | |||
| ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | |||
| args.input_tensor); | |||
| args.graph_node->SetRunFlag(false); | |||
| if (ret != SUCCESS) { | |||
| ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | |||
| args.graph_node->Unlock(); | |||
| @@ -3188,6 +3203,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
| sub_graph->SetSessionID(session_id); | |||
| sub_graph->SetGraphID(graph_node->GetGraphId()); | |||
| } | |||
| bool off_superkernel = false; | |||
| if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||
| GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel); | |||
| if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||
| GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(), | |||
| off_superkernel); | |||
| return FAILED; | |||
| } | |||
| } | |||
| GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | |||
| GE_DUMP(merged_compute_graph, "mergedComputeGraph"); | |||
| compute_graph = merged_compute_graph; | |||
| @@ -3221,6 +3245,21 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPar | |||
| non_tuning_subgraphs.push_back(sub_graph_tmp); | |||
| } | |||
| } | |||
| // for function graphs to tune | |||
| for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | |||
| auto subgraph_list = sub_graph_map[function_graph]; | |||
| for (const auto &sub_graph_info_ptr : subgraph_list) { | |||
| GE_CHECK_NOTNULL(sub_graph_info_ptr); | |||
| ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | |||
| // need to tuning | |||
| if (sub_graph_info_ptr->GetEngineName() == kVectorEngine || | |||
| sub_graph_info_ptr->GetEngineName() == kAIcoreEngine) { | |||
| tuning_subgraphs.push_back(sub_graph_tmp); | |||
| } else { | |||
| non_tuning_subgraphs.push_back(sub_graph_tmp); | |||
| } | |||
| } | |||
| } | |||
| return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path); | |||
| } | |||
| @@ -252,6 +252,7 @@ struct GraphManagerOptions { | |||
| std::string save_original_model; | |||
| std::string build_mode; | |||
| std::string build_step; | |||
| std::string tuning_path; | |||
| std::string input_shape; | |||
| std::string dynamic_dims; | |||
| int32_t dynamic_node_type = -1; | |||
| @@ -278,7 +279,8 @@ struct GraphManagerOptions { | |||
| is_single_op(false), | |||
| save_original_model("false"), | |||
| build_mode(""), | |||
| build_step("") {} | |||
| build_step(""), | |||
| tuning_path(""){} | |||
| }; | |||
| } // namespace ge | |||
| @@ -347,14 +347,18 @@ ge::Status VarManager::Init(const uint32_t &version, const uint64_t &session_id, | |||
| const uint64_t &job_id) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| GELOGI("VarManager::Init, session id = %lu.", session_id); | |||
| version_ = version; | |||
| device_id_ = device_id; | |||
| session_id_ = session_id; | |||
| job_id_ = job_id; | |||
| var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||
| if (var_resource_ == nullptr) { | |||
| GELOGW("VarManager has not been init."); | |||
| return ge::INTERNAL_ERROR; | |||
| version_ = version; | |||
| device_id_ = device_id; | |||
| session_id_ = session_id; | |||
| job_id_ = job_id; | |||
| var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||
| if (var_resource_ == nullptr) { | |||
| GELOGW("VarManager init failed session id = %lu.", session_id); | |||
| return ge::INTERNAL_ERROR; | |||
| } | |||
| } else { | |||
| GELOGW("VarManager::has been inited, session id = %lu.", session_id); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -48,50 +48,23 @@ namespace ge { | |||
| using Cluster = DynamicShapePartitioner::Cluster; | |||
| using ClusterPtr = std::shared_ptr<Cluster>; | |||
| static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { | |||
| static bool IsSingleOpScene(const ComputeGraphPtr &root_graph) { | |||
| for (const auto &node : root_graph->GetAllNodes()) { | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| // not do partition in single op scene. | |||
| bool is_singleop = false; | |||
| (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_singleop); | |||
| if (is_singleop) { | |||
| return false; | |||
| } | |||
| for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { | |||
| auto type = input_desc.GetDataType(); | |||
| if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | |||
| if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { | |||
| return false; | |||
| } else { | |||
| GEEVENT("In dynamic shape scene, model contains data type:" | |||
| "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " | |||
| "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { | |||
| auto type = output_desc.GetDataType(); | |||
| if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | |||
| if (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") == nullptr) { | |||
| return false; | |||
| } else { | |||
| GEEVENT("In dynamic shape scene, model contains data type:" | |||
| "DT_STRING/DT_RESOURCE/DT_STRING_REF may not be supported well " | |||
| "temporarily, please retry with \"unset EXPERIMENTAL_DYNAMIC_PARTITION\"."); | |||
| break; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } | |||
| return true; | |||
| return false; | |||
| } | |||
| Status DynamicShapePartitioner::Partition() { | |||
| REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); | |||
| if (!IsInExperimentalMode(root_graph_)) { | |||
| GELOGD("Skip dynamic shape partition as not in experimental mode."); | |||
| if (IsSingleOpScene(root_graph_)) { | |||
| GELOGD("Skip dynamic shape partition as in single op scene."); | |||
| REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | |||
| "Failed set dynamic shape partitioned flag on root graph."); | |||
| return SUCCESS; | |||
| @@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000; | |||
| constexpr size_t kMaxOneInNodes = 1000; | |||
| // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | |||
| constexpr int kMaxRecursiveDepth = 20; | |||
| struct DuringPassNodeSets { | |||
| std::unordered_set<Node *> nodes_seen; | |||
| std::unordered_set<NodePtr> nodes_deleted; | |||
| std::unordered_set<NodePtr> nodes_re_pass; | |||
| std::unordered_set<NodePtr> nodes_re_pass_immediately; | |||
| std::unordered_set<NodePtr> nodes_last; | |||
| }; | |||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes, | |||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | |||
| nodes_last.clear(); | |||
| for (auto &node : graph->GetDirectNode()) { | |||
| @@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||
| } | |||
| size_t in_nums = node->GetInNodes().size(); | |||
| if (in_nums == 0) { | |||
| input_edge_nodes.push(node); | |||
| input_edge_nodes.push_back(node); | |||
| nodes_seen.insert(node.get()); | |||
| } else if (in_nums > kMaxOneInNodes) { | |||
| nodes_last.insert(node); | |||
| @@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||
| } | |||
| } | |||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass, | |||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | |||
| for (auto &node : nodes) { | |||
| if (node == nullptr) { | |||
| @@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||
| nodes_to_pass.push(node); | |||
| nodes_to_pass.push_back(node); | |||
| } | |||
| } | |||
| } | |||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass, | |||
| std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) { | |||
| void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass, | |||
| std::unordered_set<NodePtr> &nodes_re_pass) { | |||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||
| if (node_to_re_pass == nullptr) { | |||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||
| node->GetName().c_str(), node->GetType().c_str()); | |||
| continue; | |||
| } | |||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||
| GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str()); | |||
| nodes_re_pass.insert(node_to_re_pass); | |||
| } else { | |||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||
| } | |||
| } | |||
| } | |||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) { | |||
| if (node == nullptr) { | |||
| GELOGE(FAILED, "parameter is null."); | |||
| return FAILED; | |||
| @@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder | |||
| } | |||
| auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | |||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||
| if (node_to_re_pass == nullptr) { | |||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||
| node->GetName().c_str(), node->GetType().c_str()); | |||
| continue; | |||
| } | |||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||
| GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); | |||
| nodes_re_pass.insert(node_to_re_pass); | |||
| } else { | |||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||
| } | |||
| } | |||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass, | |||
| during_pass_node_set.nodes_re_pass); | |||
| auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); | |||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately, | |||
| during_pass_node_set.nodes_re_pass_immediately); | |||
| auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | |||
| nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||
| during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||
| if (nodes_deleted_by_pass.count(node) > 0) { | |||
| GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | |||
| name_to_pass.first.c_str()); | |||
| @@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { | |||
| Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | |||
| std::queue<NodePtr> nodes; | |||
| std::unordered_set<Node *> nodes_seen; | |||
| std::unordered_set<NodePtr> nodes_deleted; | |||
| std::unordered_set<NodePtr> nodes_re_pass; | |||
| std::unordered_set<NodePtr> nodes_last; | |||
| GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last); | |||
| std::deque<NodePtr> nodes; | |||
| DuringPassNodeSets during_pass_node_set; | |||
| GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||
| GELOGD("Start points count %zu", nodes.size()); | |||
| int re_pass_times = 0; | |||
| do { | |||
| for (auto &node : nodes_re_pass) { | |||
| nodes.push(node); | |||
| nodes_seen.insert(node.get()); | |||
| for (auto &node : during_pass_node_set.nodes_re_pass) { | |||
| nodes.push_back(node); | |||
| during_pass_node_set.nodes_seen.insert(node.get()); | |||
| } | |||
| nodes_re_pass.clear(); | |||
| during_pass_node_set.nodes_re_pass.clear(); | |||
| while (!nodes.empty()) { | |||
| NodePtr node = nodes.front(); | |||
| nodes.pop(); | |||
| nodes.pop_front(); | |||
| (void)nodes_re_pass.erase(node); | |||
| (void)during_pass_node_set.nodes_re_pass.erase(node); | |||
| GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | |||
| if (nodes_deleted.count(node) > 0) { | |||
| if (during_pass_node_set.nodes_deleted.count(node) > 0) { | |||
| GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last); | |||
| AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||
| auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||
| auto ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | |||
| node->GetName().c_str(), node->GetType().c_str(), ret); | |||
| @@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| if (has_sub_graph) { | |||
| GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | |||
| SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | |||
| ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||
| ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | |||
| node->GetName().c_str(), node->GetType().c_str(), ret); | |||
| @@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| // should be called each time at the begin of the iteration | |||
| ClearOption(names_to_passes); | |||
| } | |||
| for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { | |||
| GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); | |||
| nodes.push_front(node); | |||
| } | |||
| during_pass_node_set.nodes_re_pass_immediately.clear(); | |||
| } | |||
| for (auto &node : nodes_last) { | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||
| nodes.push(node); | |||
| for (auto &node : during_pass_node_set.nodes_last) { | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen); | |||
| if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) { | |||
| nodes.push_back(node); | |||
| } | |||
| } | |||
| nodes_last.clear(); | |||
| } while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||
| during_pass_node_set.nodes_last.clear(); | |||
| } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||
| if (re_pass_times == kMaxRePassTimes) { | |||
| GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | |||
| @@ -53,6 +53,8 @@ class BaseNodePass { | |||
| std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | |||
| std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } | |||
| std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | |||
| void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | |||
| @@ -62,6 +64,7 @@ class BaseNodePass { | |||
| void init() { | |||
| nodes_need_re_pass_.clear(); | |||
| nodes_deleted_.clear(); | |||
| nodes_need_re_pass_immediately_.clear(); | |||
| } | |||
| protected: | |||
| @@ -79,6 +82,14 @@ class BaseNodePass { | |||
| /// | |||
| void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | |||
| /// | |||
| /// Add a node to be optimized immediately again. If you add a new node to the graph, or | |||
| /// change a node connections, and you want to make sure the node will be | |||
| /// optimized by other passes, call this function. | |||
| /// @param node | |||
| /// | |||
| void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } | |||
| /// | |||
| /// Add a node and it's input/output data nodes to be optimized again. | |||
| /// @param node | |||
| @@ -109,6 +120,7 @@ class BaseNodePass { | |||
| private: | |||
| std::unordered_set<NodePtr> nodes_need_re_pass_; | |||
| std::unordered_set<NodePtr> nodes_need_re_pass_immediately_; | |||
| std::unordered_set<NodePtr> nodes_deleted_; | |||
| std::map<NodePassOption, std::string> options_; | |||
| }; | |||
| @@ -167,7 +167,7 @@ bool CastTranslatePass::IsOpSupportedOptimize(NodePtr &cast_node, NodePtr &trans | |||
| trans_op_outdesc->SetDataType(cast_out_datatype); | |||
| } | |||
| if (!TranslateCheckAccuracySupported(trans_op_desc)) { | |||
| if (!TranslateCheckAccuracySupported(trans_node)) { | |||
| if (is_src_cast) { | |||
| trans_op_desc->MutableInputDesc(0)->SetDataType(trans_in_datatype); | |||
| } else { | |||
| @@ -271,7 +271,8 @@ Status CastTranslatePass::FuseDstNTranslates(NodePtr &node) { | |||
| return SUCCESS; | |||
| } | |||
| bool CastTranslatePass::TranslateCheckAccuracySupported(const OpDescPtr &op_desc) { | |||
| bool CastTranslatePass::TranslateCheckAccuracySupported(NodePtr &node) { | |||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
| GELOGW("GE is not initialized or is finalized."); | |||
| @@ -293,7 +294,7 @@ bool CastTranslatePass::TranslateCheckAccuracySupported(const OpDescPtr &op_desc | |||
| auto kernel_info_store = kernel_map.find(kernel_name); | |||
| if (kernel_info_store != kernel_map.end()) { | |||
| if (kernel_info_store->second != nullptr && | |||
| kernel_info_store->second->CheckAccuracySupported(op_desc, unsupported_reason)) { | |||
| kernel_info_store->second->CheckAccuracySupported(node, unsupported_reason)) { | |||
| return true; | |||
| } | |||
| } | |||
| @@ -35,7 +35,7 @@ class CastTranslatePass : public BaseNodePass { | |||
| bool IsOpSupportedOptimize(NodePtr &cast_node, NodePtr &trans_node, bool &is_src_cast); | |||
| bool CheckOpSupportOptimize(NodePtr &node, bool &is_src_cast); | |||
| Status FuseDstNTranslates(NodePtr &node); | |||
| bool TranslateCheckAccuracySupported(const OpDescPtr &op_desc); | |||
| bool TranslateCheckAccuracySupported(NodePtr &node); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_CAST_TRANSLATE_PASS_H_ | |||
| @@ -110,7 +110,7 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std: | |||
| return ge::GE_GRAPH_PARAM_NULLPTR; | |||
| } | |||
| // begin accuracy supported check | |||
| if (!CheckAccuracySupport(kernel_info, instance, op_desc)) { | |||
| if (!CheckAccuracySupport(kernel_info, instance, node)) { | |||
| // if check accuracy support failed , try to go to other engine. | |||
| GELOGD("Check Accuracy Supported return not support, node name is %s. Try to go to other engine.", | |||
| op_desc->GetName().c_str()); | |||
| @@ -123,7 +123,7 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std: | |||
| continue; | |||
| } | |||
| OpsKernelInfoStorePtr tmp_kernel_info = it->second; | |||
| if (CheckAccuracySupport(tmp_kernel_info, instance, op_desc)) { | |||
| if (CheckAccuracySupport(tmp_kernel_info, instance, node)) { | |||
| kernel_lib_name = tmp_kernel_name; | |||
| GELOGD("Find kernel lib %s support node:%s, type:%s , get kernel lib success.", tmp_kernel_name.c_str(), | |||
| node->GetName().c_str(), op_desc->GetType().c_str()); | |||
| @@ -138,14 +138,9 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std: | |||
| } | |||
| bool CompileNodesPass::CheckAccuracySupport(const OpsKernelInfoStorePtr &kernel_info, | |||
| const std::shared_ptr<GELib> instance, OpDescPtr &op_desc) { | |||
| auto ge_desc = MakeShared<ge::OpDescPtr>(op_desc); | |||
| if (ge_desc == nullptr) { | |||
| GELOGE(GE_GRAPH_MEMORY_ALLOC_FAILED, "Fail to malloc op desc."); | |||
| return false; | |||
| } | |||
| const std::shared_ptr<GELib> instance, const NodePtr &node) { | |||
| string reason; | |||
| if (!(kernel_info->CheckAccuracySupported(*ge_desc, reason, true))) { | |||
| if (!(kernel_info->CheckAccuracySupported(node, reason, true))) { | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -39,7 +39,7 @@ class CompileNodesPass : public GraphPass { | |||
| private: | |||
| graphStatus GetSupportedKernel(const NodePtr &node, const std::shared_ptr<GELib> instance, string &kernel_lib_name); | |||
| bool CheckAccuracySupport(const OpsKernelInfoStorePtr &kernel_info, const std::shared_ptr<GELib> instance, | |||
| OpDescPtr &op_desc); | |||
| const NodePtr &node); | |||
| graphStatus CompileNodes(const std::shared_ptr<GELib> instance, | |||
| std::unordered_map<string, vector<NodePtr>> &kernel_to_compile_nodes); | |||
| }; | |||
| @@ -29,13 +29,13 @@ const int kRemoveInputIndex = 1; | |||
| Status DimensionAdjustPass::Run(ge::NodePtr &node) { | |||
| if (node == nullptr) { | |||
| GELOGE(PARAM_INVALID, "node is nullptr"); | |||
| GELOGE(PARAM_INVALID, "node is nullptr."); | |||
| return PARAM_INVALID; | |||
| } | |||
| OpDescPtr op_desc_ptr = node->GetOpDesc(); | |||
| if (op_desc_ptr == nullptr) { | |||
| GELOGE(PARAM_INVALID, "GetOpDesc return nullptr"); | |||
| GELOGE(PARAM_INVALID, "GetOpDesc return nullptr."); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -33,11 +33,11 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { | |||
| GE_CHECK_NOTNULL(compute_graph); | |||
| if (!PassUtils::IsNeedTrainIteFlowCtrl(compute_graph)) { | |||
| GELOGI("No need FlowCtrl for graph %u", compute_graph->GetGraphID()); | |||
| GELOGI("No need FlowCtrl for graph %u.", compute_graph->GetGraphID()); | |||
| return NOT_CHANGED; | |||
| } | |||
| GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); | |||
| GELOGI("FlowCtrl pass begin.graph is [%s].", compute_graph->GetName().c_str()); | |||
| bool graph_change = false; | |||
| // 1. Add FP/BP flow ctrl (big cycle) | |||
| for (auto &node : compute_graph->GetDirectNode()) { | |||
| @@ -347,11 +347,11 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c | |||
| NodePtr assign_node = | |||
| InsertAssignOp(compute_graph, ASSIGN, NODE_NAME_FLOWCTRL_LOOP_ASSIGN, loop_cond_node, loop_reset_node); | |||
| if (assign_node == nullptr || switch_node == nullptr) { | |||
| GELOGE(PARAM_INVALID, "assign_node or switch node is null"); | |||
| GELOGE(PARAM_INVALID, "assign_node or switch node is null."); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_node, switch_node->GetName()), "set stream label failed"); | |||
| GE_CHK_STATUS_RET(SetStreamLabel(assign_node, switch_node->GetName()), "set stream label failed."); | |||
| graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_node->GetInControlAnchor()); | |||
| if (add_ret != GRAPH_SUCCESS) { | |||
| @@ -370,7 +370,7 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c | |||
| } | |||
| GE_CHK_STATUS_RET(SetStreamLabel(active_node, switch_node->GetName()), "set stream label failed"); | |||
| GE_CHK_STATUS_RET(SetSwitchBranchNodeLabel(active_node, switch_node->GetName()), | |||
| "set switch branch node label failed"); | |||
| "set switch branch node label failed."); | |||
| string model_exit_name = switch_node->GetName() + "_ModelExit"; | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { model_exit_name }), "set active label list failed"); | |||
| @@ -401,7 +401,7 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c | |||
| } | |||
| Status FlowCtrlPass::AddFpBpIteratorCtrl(ComputeGraphPtr &compute_graph, NodePtr &pre_node) { | |||
| GE_IF_BOOL_EXEC(pre_node == nullptr, DOMI_LOGE("pre_node is nullptr"); return FAILED); | |||
| GE_IF_BOOL_EXEC(pre_node == nullptr, DOMI_LOGE("pre_node is nullptr."); return FAILED); | |||
| string pre_node_name = pre_node->GetName(); | |||
| GELOGI("Add FpBp Iterator ctrl, pre node:%s.", pre_node_name.c_str()); | |||
| // 1. Get or add variables | |||
| @@ -477,7 +477,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| * itersPerLoop loopCond | |||
| */ | |||
| GE_IF_BOOL_EXEC(loop_after_node == nullptr || compute_graph == nullptr, | |||
| DOMI_LOGE("loop after node or compute graph is null"); return FAILED); | |||
| DOMI_LOGE("loop after node or compute graph is null."); return FAILED); | |||
| InDataAnchorPtr in_anchor = loop_after_node->GetInDataAnchor(0); | |||
| if (in_anchor == nullptr || in_anchor->GetPeerOutAnchor() == nullptr) { | |||
| GELOGE(FAILED, "Find %s in data anchor failed.", loop_after_node->GetName().c_str()); | |||
| @@ -498,7 +498,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| } | |||
| // 2. Add StreamSwitch and edges to switch_node. | |||
| GE_IF_BOOL_EXEC(loop_pre_node == nullptr, DOMI_LOGE("loop pre node is null"); return FAILED); | |||
| GE_IF_BOOL_EXEC(loop_pre_node == nullptr, DOMI_LOGE("loop pre node is null."); return FAILED); | |||
| string switch_name = loop_pre_node->GetName() + "_" + NODE_NAME_STREAM_SWITCH; | |||
| NodePtr switch_node = InsertStreamSwitchOp(compute_graph, switch_name, loop_cond_node, iter_per_loop_node); | |||
| if (switch_node == nullptr) { | |||
| @@ -506,7 +506,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(SetStreamLabel(switch_node, switch_name), "set stream label failed"); | |||
| GE_CHK_STATUS_RET(SetStreamLabel(switch_node, switch_name), "set stream label failed."); | |||
| graphStatus add_ret = GraphUtils::AddEdge(loop_pre_node->GetOutControlAnchor(), switch_node->GetInControlAnchor()); | |||
| if (add_ret != GRAPH_SUCCESS) { | |||
| @@ -529,7 +529,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); | |||
| GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed."); | |||
| GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), | |||
| DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); | |||
| @@ -542,7 +542,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, | |||
| } | |||
| // used for stream assign to find true branch | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed."); | |||
| // used for stream assign to find active stream | |||
| GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); | |||
| active_nodes_in_iter_loop_.push_back(active_node); | |||
| @@ -25,6 +25,7 @@ | |||
| namespace ge { | |||
| Status InferShapePass::Run(NodePtr &node) { | |||
| // kOptimizeAfterSubGraph exist means after subgraph | |||
| auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| // select INFERSHAPE failed info | |||
| @@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) { | |||
| GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | |||
| return GE_GRAPH_INFERSHAPE_FAILED; | |||
| } | |||
| bool need_repass = false; | |||
| auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass); | |||
| if (has_attr) { | |||
| if (!OptionExists(kOptimizeAfterSubGraph)) { | |||
| return SUCCESS; | |||
| } | |||
| if (need_repass) { | |||
| AddImmediateRePassNode(node); | |||
| GELOGD("Node %s need repass immediately.", node->GetName().c_str()); | |||
| } else { | |||
| // clear attr on while | |||
| node->GetOpDesc()->DelAttr("need_infer_again_"); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -93,7 +93,7 @@ bool LinkGenMaskNodesPass::AreAllInputsConst(const NodePtr &node) const { | |||
| void LinkGenMaskNodesPass::GetAllGenMaskNodes(ComputeGraphPtr graph, vector<NodePtr> &gen_mask_nodes) const { | |||
| set<NodePtr> nodes_set; | |||
| for (const NodePtr &node : graph->GetDirectNode()) { | |||
| if (node->GetType() != DROPOUTDOMASK) { | |||
| if (node->GetType() != DROPOUTDOMASK && node->GetType() != DROPOUTDOMASKV3 && node->GetType() != DROPOUTDOMASKV3D) { | |||
| continue; | |||
| } | |||
| @@ -555,6 +555,8 @@ void NetOutputPass::AddInOutForNetOutputOp(const ComputeGraphPtr &graph, OpDescP | |||
| return; | |||
| } | |||
| ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); | |||
| out_desc.SetFormat(FORMAT_ND); | |||
| out_desc.SetOriginFormat(FORMAT_ND); | |||
| GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); | |||
| is_input_const.push_back(PassUtils::IsConstant(src_node)); | |||
| ++iter; | |||
| @@ -22,6 +22,10 @@ | |||
| using std::string; | |||
| namespace ge { | |||
| namespace { | |||
| const int64_t kLoopType = 1; | |||
| } | |||
| Status NextIterationPass::Run(ComputeGraphPtr graph) { | |||
| GELOGD("NextIterationPass Enter"); | |||
| /// Enter-----------+ | |||
| @@ -121,7 +125,10 @@ Status NextIterationPass::FindWhileGroups() { | |||
| if (switch_node == nullptr) { | |||
| continue; | |||
| } | |||
| if (!AttrUtils::SetInt(switch_node->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_TYPE, kLoopType)) { | |||
| GELOGE(INTERNAL_ERROR, "set int failed"); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| NodePtr loop_cond = nullptr; | |||
| if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); | |||
| @@ -0,0 +1,354 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/passes/parallel_group_pass.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "common/ge/ge_util.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| namespace ge { | |||
| namespace { | |||
| const int32_t kMaxRecursionDepth = 10; | |||
| const int64_t kLoopType = 1; | |||
| } | |||
| Status ParallelGroupPass::Run(ComputeGraphPtr graph) { | |||
| GELOGD("ParallelGroupPass running"); | |||
| if (graph == nullptr) { | |||
| GELOGE(PARAM_INVALID, "[Check][Graph]Input param graph is null, skip ParallelGroupPass."); | |||
| REPORT_INNER_ERROR("E19999", "Input param graph is null, skip ParallelGroupPass."); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (graph->GetParentGraph() != nullptr) { | |||
| GELOGD("Current graph %s is a subgraph, this pass only support root graph.", | |||
| graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| if (graph->TopologicalSorting() != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "[TopoSort][Graph]Graph:%s topological sort failed.", graph->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Graph:%s topological sort failed when ParallelGroupPass run.", | |||
| graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| std::unordered_set<std::string> parallel_groups; | |||
| int depth = 0; | |||
| if (ProcessGraphGroupNodes(graph, depth, parallel_groups) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "[Process][Graph]Process group nodes of graph %s failed.", graph->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| if (graph->TopologicalSorting() != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "[TopoSort][Graph]Graph:%s topological sort failed.", graph->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Graph:%s topological sort failed when ParallelGroupPass run.", | |||
| graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status ParallelGroupPass::ProcessGraphGroupNodes(ComputeGraphPtr graph, int32_t depth, | |||
| std::unordered_set<std::string> ¶llel_groups) { | |||
| if (depth >= kMaxRecursionDepth) { | |||
| GELOGE(FAILED, "[Process][SubGraph]There are too much subgraphs:%d > %d(max subgraphs)", depth, kMaxRecursionDepth); | |||
| REPORT_INNER_ERROR("E19999", "There are too much subgraphs:%d > %d(max subgraphs)", depth, kMaxRecursionDepth); | |||
| return FAILED; | |||
| } | |||
| std::map<std::string, vector<NodePtr>> group_nodes; | |||
| auto candidates = graph->GetDirectNode(); | |||
| auto root_graph = GraphUtils::FindRootGraph(graph); | |||
| for (const auto &node : candidates) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| std::string group_name; | |||
| if (AttrUtils::GetStr(op_desc, ATTR_NAME_PARALLEL_GROUP, group_name)) { | |||
| group_nodes[group_name].push_back(node); | |||
| parallel_groups.insert(group_name); | |||
| GELOGD("Find group node:%s, group_name:%s", node->GetName().c_str(), group_name.c_str()); | |||
| } | |||
| const auto &subgraph_name = op_desc->GetSubgraphInstanceNames(); | |||
| GE_CHECK_NOTNULL(root_graph); | |||
| for (auto name_iter = subgraph_name.rbegin(); name_iter != subgraph_name.rend(); ++name_iter) { | |||
| const auto &sub_graph = root_graph->GetSubgraph(*name_iter); | |||
| GE_CHECK_NOTNULL(sub_graph); | |||
| // if the pass add control edge for known and unknown graph, then the known graph will become unknown graph | |||
| // the order between known and unknown graph is guaranteed by dynamic shape executor | |||
| // so the parallel group pass do nothing for unknown graph | |||
| if (sub_graph->GetGraphUnknownFlag()) { | |||
| continue; | |||
| } | |||
| std::unordered_set<std::string> sub_parallel_groups; | |||
| auto ret = ProcessGraphGroupNodes(sub_graph, depth + 1, sub_parallel_groups); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "[Process][SubGraph]Process sub graph %s failed.", sub_graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| for (const auto &sub_parallel_group : sub_parallel_groups) { | |||
| parallel_groups.insert(sub_parallel_group); | |||
| group_nodes[sub_parallel_group].emplace_back(node); | |||
| } | |||
| } | |||
| } | |||
| std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> node_2_switch_merge; | |||
| if (ProcessGroupNodeInSwitch(graph, node_2_switch_merge) != SUCCESS) { | |||
| GELOGE(FAILED, "[Process][Node]Process group node in switch failed, graph:%s.", graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| for (const auto &itr : group_nodes) { | |||
| const auto &nodes = itr.second; | |||
| if (nodes.empty()) { | |||
| continue; | |||
| } | |||
| NodePtr pre_node = nodes[0]; | |||
| NodePtr cur_node = nullptr; | |||
| for (std::size_t i = 1; i < nodes.size(); i++) { | |||
| cur_node = nodes[i]; | |||
| GELOGD("Original add ctrl anchor for node:%s->%s", pre_node->GetName().c_str(), | |||
| cur_node->GetName().c_str()); | |||
| if (ReplaceWithSwitchAndMerge(pre_node, cur_node, node_2_switch_merge) != SUCCESS) { | |||
| GELOGE(FAILED, "[Replace][Node]Replace switch and merges for nodes: %s and %s failed.", | |||
| pre_node->GetName().c_str(), cur_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| pre_node = cur_node; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status ParallelGroupPass::AddCtrlEdge(NodePtr pre_node, NodePtr cur_node) { | |||
| if (pre_node == cur_node) { | |||
| GELOGD("Pre_node and cur_node are same, no need add anchor"); | |||
| return SUCCESS; | |||
| } | |||
| auto in_nodes = cur_node->GetInAllNodes(); | |||
| for (const auto &node : in_nodes) { | |||
| if (pre_node == node) { | |||
| GELOGD("Node:%s and %s already linked", pre_node->GetName().c_str(), | |||
| cur_node->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| } | |||
| GELOGD("Finally add ctrl anchor for node:%s->%s", pre_node->GetName().c_str(), | |||
| cur_node->GetName().c_str()); | |||
| return GraphUtils::AddEdge(pre_node->GetOutControlAnchor(), | |||
| cur_node->GetInControlAnchor()); | |||
| } | |||
| Status ParallelGroupPass::ProcessGroupNodeInSwitch(ComputeGraphPtr graph, | |||
| std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge) { | |||
| std::string type; | |||
| auto direct_nodes = graph->GetDirectNode(); | |||
| for (const auto &node : direct_nodes) { | |||
| type = node->GetType(); | |||
| if (type != STREAMSWITCH) { | |||
| continue; | |||
| } | |||
| if (IsBigSmallLoopStreamSwitch(node->GetOpDesc()) || | |||
| IsWhileStreamSwitch(node->GetOpDesc())) { | |||
| continue; | |||
| } | |||
| std::vector<NodePtr> merge_nodes; | |||
| std::set<NodePtr> group_nodes; | |||
| std::set<std::string> stream_labels; | |||
| FindGroupNodeAndMerge(node, group_nodes, merge_nodes, stream_labels); | |||
| if (merge_nodes.empty() || (!group_nodes.empty() && stream_labels.size() > 1)) { | |||
| GELOGE(FAILED, "[Process][Node]Cannot find merge node or exist switch nestification, switch node:%s," | |||
| "merge_vec size:%zu, stream_labels size:%zu, graph:%s.", node->GetName().c_str(), | |||
| merge_nodes.size(), stream_labels.size(), graph->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Cannot find merge node or exist switch nest, switch node:%s," | |||
| "merge_vec size: %zu, stream_labels size: %zu, graph:%s.", node->GetName().c_str(), | |||
| merge_nodes.size(), stream_labels.size(), graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| std::sort(merge_nodes.begin(), merge_nodes.end(), | |||
| [] (NodePtr a, NodePtr b) -> bool { | |||
| return (a->GetOpDesc()->GetId() < b->GetOpDesc()->GetId()); | |||
| }); | |||
| NodePtr cast_node = NodeUtils::GetInDataNodeByIndex(*node, 0); | |||
| GE_CHECK_NOTNULL(cast_node); | |||
| if (MappingNodeToSwitchAndMerge(group_nodes, merge_nodes, | |||
| cast_node, node, | |||
| node_2_switch_merge) != SUCCESS) { | |||
| GELOGE(FAILED, "[Mapping][Node]Mapping node to switch and merge failed, graph:%s.", graph->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "[Mapping][Node]Mapping node to switch and merge failed, graph:%s.", | |||
| graph->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void ParallelGroupPass::FindGroupNodeAndMerge(NodePtr stream_switch_node, std::set<NodePtr> &group_nodes, | |||
| std::vector<NodePtr> &merge_nodes, std::set<std::string> &stream_labels) { | |||
| std::string type; | |||
| std::deque<NodePtr> candidates; | |||
| std::set<NodePtr> visited; | |||
| candidates.push_back(stream_switch_node); | |||
| while (!candidates.empty()) { | |||
| NodePtr tmp_node = candidates.front(); | |||
| candidates.pop_front(); | |||
| for (const auto &out_node : tmp_node->GetOutAllNodes()) { | |||
| type = out_node->GetType(); | |||
| if (type == STREAMMERGE) { | |||
| merge_nodes.emplace_back(out_node); | |||
| continue; | |||
| } | |||
| const auto &op = out_node->GetOpDesc(); | |||
| if (op != nullptr && op->HasAttr(ATTR_NAME_PARALLEL_GROUP)) { | |||
| group_nodes.emplace(out_node); | |||
| } | |||
| if (visited.count(out_node) > 0) { | |||
| continue; | |||
| } | |||
| candidates.push_back(out_node); | |||
| visited.insert(out_node); | |||
| std::string stream_label; | |||
| if (ge::AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { | |||
| stream_labels.insert(stream_label); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| Status ParallelGroupPass::MappingNodeToSwitchAndMerge(const std::set<NodePtr> &group_nodes, | |||
| const std::vector<NodePtr> &merge_nodes, | |||
| const NodePtr &cast_node, const NodePtr &switch_node, | |||
| std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge) { | |||
| for (const auto &group_node : group_nodes) { | |||
| auto itr = node_2_switch_merge.find(group_node); | |||
| if (itr != node_2_switch_merge.end()) { | |||
| auto &tmp = itr->second; | |||
| auto &switch_set = tmp.first; | |||
| const auto &merge_node = tmp.second; | |||
| GELOGD("Find group node: %s in switch %s and merge %s.", | |||
| group_node->GetName().c_str(), switch_node->GetName().c_str(), merge_node->GetName().c_str()); | |||
| if (merge_node != merge_nodes.back()) { | |||
| GELOGE(FAILED, "[Mapping][Node]Has two different merge nodes: %s and %s, graph's structure is invalid", | |||
| merge_node->GetName().c_str(), merge_nodes.back()->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Has two different merge nodes: %s and %s," | |||
| "graph's structure is invalid", | |||
| merge_node->GetName().c_str(), merge_nodes.back()->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| switch_set.insert(cast_node); | |||
| } else { | |||
| node_2_switch_merge.emplace(group_node, | |||
| std::make_pair(std::set<NodePtr>{cast_node}, merge_nodes.back())); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status ParallelGroupPass::ReplaceWithSwitchAndMerge(NodePtr pre_node, NodePtr cur_node, | |||
| const std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge) { | |||
| auto pre_itr = node_2_switch_merge.find(pre_node); | |||
| auto cur_itr = node_2_switch_merge.find(cur_node); | |||
| if (pre_itr != node_2_switch_merge.end()) { | |||
| if (cur_itr != node_2_switch_merge.end()) { | |||
| const auto &pre_set = pre_itr->second.first; | |||
| const auto &cur_set = cur_itr->second.first; | |||
| if (!HasSameSwitch(pre_set, cur_set)) { | |||
| pre_node = pre_itr->second.second; | |||
| for (const auto &switch_node : cur_itr->second.first) { | |||
| if (AddCtrlEdge(pre_node, switch_node) != SUCCESS) { | |||
| GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } else { | |||
| pre_node = pre_itr->second.second; | |||
| return AddCtrlEdge(pre_node, cur_node); | |||
| } | |||
| } else { | |||
| if (cur_itr != node_2_switch_merge.end()) { | |||
| for (const auto &switch_node : cur_itr->second.first) { | |||
| int64_t pre_id = pre_node->GetOpDesc()->GetId(); | |||
| int64_t switch_id = switch_node->GetOpDesc()->GetId(); | |||
| // avoid ring | |||
| if (pre_id > switch_id) { | |||
| auto merge_node = cur_itr->second.second; | |||
| if (AddCtrlEdge(merge_node, pre_node) != SUCCESS) { | |||
| GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } else { | |||
| if (AddCtrlEdge(pre_node, switch_node) != SUCCESS) { | |||
| GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
| pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| return AddCtrlEdge(pre_node, cur_node); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| bool ParallelGroupPass::HasSameSwitch(const std::set<NodePtr> &switch_set1, const std::set<NodePtr> &switch_set2) { | |||
| for (const auto &node1 : switch_set1) { | |||
| auto itr = switch_set2.find(node1); | |||
| if (itr != switch_set2.end()) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| bool ParallelGroupPass::IsBigSmallLoopStreamSwitch(OpDescPtr switch_op_desc) { | |||
| return !AttrUtils::HasAttr(switch_op_desc, ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG); | |||
| } | |||
| bool ParallelGroupPass::IsWhileStreamSwitch(OpDescPtr switch_op_desc) { | |||
| int64_t stream_switch_type = -1; | |||
| return (AttrUtils::GetInt(switch_op_desc, ATTR_NAME_STREAM_SWITCH_TYPE, stream_switch_type) && | |||
| stream_switch_type == kLoopType); | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,53 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_PARALLEL_GROUP_PASS_H | |||
| #define GE_GRAPH_PASSES_PARALLEL_GROUP_PASS_H | |||
| #include <map> | |||
| #include <unordered_set> | |||
| #include "graph/graph.h" | |||
| #include "inc/graph_pass.h" | |||
| namespace ge { | |||
| class ParallelGroupPass : public GraphPass { | |||
| public: | |||
| Status Run(ComputeGraphPtr graph) override; | |||
| private: | |||
| Status ProcessGraphGroupNodes(ComputeGraphPtr graph, int32_t depth, std::unordered_set<std::string> ¶llel_group); | |||
| Status AddCtrlEdge(NodePtr pre_node, NodePtr cur_node); | |||
| Status ReplaceWithSwitchAndMerge(NodePtr pre_node, NodePtr cur_node, | |||
| const std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge); | |||
| bool HasSameSwitch(const std::set<NodePtr> &a, const std::set<NodePtr> &b); | |||
| Status ProcessGroupNodeInSwitch(ComputeGraphPtr graph, | |||
| std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge); | |||
| void FindGroupNodeAndMerge(NodePtr stream_switch_node, std::set<NodePtr> &group_nodes, | |||
| std::vector<NodePtr> &merge_nodes, std::set<std::string> &stream_labels); | |||
| Status MappingNodeToSwitchAndMerge(const std::set<NodePtr> &group_set, const std::vector<NodePtr> &merge_vec, | |||
| const NodePtr &cast_node, const NodePtr &switch_node, | |||
| std::map<NodePtr, std::pair<std::set<NodePtr>, NodePtr>> &node_2_switch_merge); | |||
| bool IsBigSmallLoopStreamSwitch(OpDescPtr switch_op_desc); | |||
| bool IsWhileStreamSwitch(OpDescPtr switch_op_desc); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_PARALLEL_GROUP_PASS_H | |||
| @@ -63,16 +63,17 @@ Status ResourcePairAddControlPass::Run(ComputeGraphPtr graph) { | |||
| NodePtr from_node = prefix_2_node.second; | |||
| GE_CHECK_NOTNULL(from_node); | |||
| auto to_item_prefix_2_node = prefix_2_node_per_type.find(resource_type_pair.second); | |||
| // stackpush and stackpop may exist in two subgraphs, no necessary to report error | |||
| if (to_item_prefix_2_node == prefix_2_node_per_type.end()) { | |||
| GELOGE(PARAM_INVALID, "find peer type node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| GELOGW("find peer type node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| resource_type_pair.first.c_str(), resource_type_pair.second.c_str()); | |||
| return PARAM_INVALID; | |||
| continue; | |||
| } | |||
| auto to_prefix_2_node = to_item_prefix_2_node->second.find(prefix); | |||
| if (to_prefix_2_node == to_item_prefix_2_node->second.end()) { | |||
| GELOGE(PARAM_INVALID, "find peer prefix node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| GELOGW("find peer prefix node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| resource_type_pair.first.c_str(), resource_type_pair.second.c_str()); | |||
| return PARAM_INVALID; | |||
| continue; | |||
| } | |||
| NodePtr to_node = to_prefix_2_node->second; | |||
| GE_CHECK_NOTNULL(to_node); | |||
| @@ -63,16 +63,17 @@ Status ResourcePairRemoveControlPass::Run(ComputeGraphPtr graph) { | |||
| NodePtr from_node = prefix_2_node.second; | |||
| GE_CHECK_NOTNULL(from_node); | |||
| auto to_item_prefix_2_node = prefix_2_node_per_type.find(resource_type_pair.second); | |||
| // stackpush and stackpop may exist in two subgraphs, no necessary to report error | |||
| if (to_item_prefix_2_node == prefix_2_node_per_type.end()) { | |||
| GELOGE(INTERNAL_ERROR, "find peer type node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| GELOGW("find peer type node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| resource_type_pair.first.c_str(), resource_type_pair.second.c_str()); | |||
| return domi::PARAM_INVALID; | |||
| continue; | |||
| } | |||
| auto to_prefix_2_node = to_item_prefix_2_node->second.find(prefix); | |||
| if (to_prefix_2_node == to_item_prefix_2_node->second.end()) { | |||
| GELOGE(INTERNAL_ERROR, "find peer prefix node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| GELOGW("find peer prefix node fail, suffix:%s, from_type:%s, to_type:%s", prefix.c_str(), | |||
| resource_type_pair.first.c_str(), resource_type_pair.second.c_str()); | |||
| return domi::PARAM_INVALID; | |||
| continue; | |||
| } | |||
| NodePtr to_node = to_prefix_2_node->second; | |||
| GE_CHECK_NOTNULL(to_node); | |||
| @@ -307,6 +307,13 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & | |||
| hccl_group_id.c_str()); | |||
| } | |||
| int64_t switch_type; | |||
| if (AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_TYPE, switch_type)) { | |||
| (void)AttrUtils::SetInt(op_desc, ATTR_NAME_STREAM_SWITCH_TYPE, switch_type); | |||
| GELOGD("Set attr ATTR_NAME_STREAM_SWITCH_TYPE for Stream_Switch %s, value is %ld.", node_name.c_str(), | |||
| switch_type); | |||
| } | |||
| if (!AttrUtils::SetInt(op_desc, ATTR_NAME_SWITCH_DATA_TYPE, RT_SWITCH_INT32) || | |||
| !AttrUtils::SetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, (int64_t)RT_EQUAL)) { | |||
| GELOGE(INTERNAL_ERROR, "set int failed"); | |||
| @@ -86,7 +86,7 @@ Status TransposeTransDataPass::Run(NodePtr &node) { | |||
| if (CheckOneInAndOneOutDataAnchor(out_node)) { | |||
| return FAILED; | |||
| } | |||
| if (!FusionIfNeed(op_desc, out_op_desc)) { | |||
| if (!FusionIfNeed(op_desc, out_node)) { | |||
| continue; | |||
| } | |||
| CopyInputEdges(node, out_node); | |||
| @@ -152,7 +152,8 @@ Status TransposeTransDataPass::RemoveTranspose(NodePtr &node) { | |||
| return SUCCESS; | |||
| } | |||
| bool TransposeTransDataPass::FusionIfNeed(OpDescPtr &op_desc, OpDescPtr &transdata_op_desc) { | |||
| bool TransposeTransDataPass::FusionIfNeed(OpDescPtr &op_desc, NodePtr &node) { | |||
| auto transdata_op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| GE_CHECK_NOTNULL(transdata_op_desc); | |||
| auto out_input_desc = transdata_op_desc->MutableInputDesc(0); | |||
| @@ -187,7 +188,7 @@ bool TransposeTransDataPass::FusionIfNeed(OpDescPtr &op_desc, OpDescPtr &transda | |||
| out_input_desc->SetFormat(src_format); | |||
| out_input_desc->SetShape(src_shape); | |||
| if (!TransDataCheckAccuracySupported(transdata_op_desc)) { | |||
| if (!TransDataCheckAccuracySupported(node)) { | |||
| out_input_desc->SetFormat(out_input_format); | |||
| out_input_desc->SetShape(out_input_shape); | |||
| return false; | |||
| @@ -224,7 +225,8 @@ void TransposeTransDataPass::CopyInputEdges(NodePtr &origin_node, NodePtr &new_n | |||
| GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return); | |||
| } | |||
| bool TransposeTransDataPass::TransDataCheckAccuracySupported(const OpDescPtr &op_desc) { | |||
| bool TransposeTransDataPass::TransDataCheckAccuracySupported(NodePtr &node) { | |||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
| GELOGW("GELib not initialized"); | |||
| @@ -244,7 +246,7 @@ bool TransposeTransDataPass::TransDataCheckAccuracySupported(const OpDescPtr &op | |||
| auto &kernel_name = it.opKernelLib; | |||
| auto kernel_info_store = kernel_map.find(kernel_name); | |||
| if (kernel_info_store != kernel_map.end()) { | |||
| if (kernel_info_store->second->CheckAccuracySupported(op_desc, unsupported_reason, true)) { | |||
| if (kernel_info_store->second->CheckAccuracySupported(node, unsupported_reason, true)) { | |||
| return true; | |||
| } | |||
| } | |||
| @@ -26,9 +26,9 @@ class TransposeTransDataPass : public BaseNodePass { | |||
| private: | |||
| Status CheckOneInAndOneOutDataAnchor(NodePtr &node) const; | |||
| Status RemoveTranspose(NodePtr &node); | |||
| bool FusionIfNeed(OpDescPtr &op_desc, OpDescPtr &transdata_op_desc); | |||
| bool FusionIfNeed(OpDescPtr &op_desc, NodePtr &node); | |||
| void CopyInputEdges(NodePtr &origin_node, NodePtr &new_node); | |||
| bool TransDataCheckAccuracySupported(const OpDescPtr &op_desc); | |||
| bool TransDataCheckAccuracySupported(NodePtr &node); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_TRANSPOSE_TRANSDATA_PASS_H_ | |||
| @@ -119,8 +119,9 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { | |||
| return INTERNAL_ERROR; | |||
| } | |||
| auto graph_id = GraphUtils::FindRootGraph(graph)->GetGraphID(); | |||
| GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), | |||
| GetContext().SessionId(), graph->GetGraphID()); | |||
| GetContext().SessionId(), graph_id); | |||
| if (var_accelerate_ctrl_ == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); | |||
| @@ -176,7 +177,7 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); | |||
| ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| @@ -23,6 +23,7 @@ | |||
| #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | |||
| #include "common/formats/format_transfers/format_transfer_transpose.h" | |||
| #include "common/formats/utils/formats_trans_utils.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "common/helper/model_helper.h" | |||
| #include "common/math/math_util.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| @@ -1304,7 +1305,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||
| auto format = desc.GetFormat(); | |||
| auto origin_format = desc.GetOriginFormat(); | |||
| // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. | |||
| bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op); | |||
| auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); | |||
| bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); | |||
| if (need_check_internal_format) { | |||
| bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | |||
| if (is_internal) { | |||
| @@ -1346,19 +1348,22 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||
| return FAILED; | |||
| } | |||
| ge::TensorUtils::SetSize(desc, shape_size); | |||
| graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||
| if (graph_ret != GRAPH_SUCCESS) { | |||
| GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||
| return graph_ret; | |||
| } | |||
| // Size will be recalculated in the build stage | |||
| ge::TensorUtils::SetSize(desc, 0); | |||
| graph_ret = op->UpdateOutputDesc(0, desc); | |||
| if (graph_ret != GRAPH_SUCCESS) { | |||
| GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||
| return graph_ret; | |||
| if (!tune_flag) { | |||
| graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||
| if (graph_ret != GRAPH_SUCCESS) { | |||
| GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||
| return graph_ret; | |||
| } | |||
| // Size will be recalculated in the build stage | |||
| ge::TensorUtils::SetSize(desc, 0); | |||
| graph_ret = op->UpdateOutputDesc(0, desc); | |||
| if (graph_ret != GRAPH_SUCCESS) { | |||
| GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||
| return graph_ret; | |||
| } | |||
| } else { | |||
| GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); | |||
| } | |||
| if (!dynamic_shape_range_vec.empty()) { | |||
| ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | |||
| GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | |||
| @@ -1763,13 +1768,13 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) { | |||
| GeTensorDesc desc(user_input[index].GetTensorDesc()); | |||
| for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { | |||
| if (desc.GetShape().GetDim(i) < 0) { | |||
| std::string situation = "data dim[" + std::to_string(i) + "][" + | |||
| std::to_string(desc.GetShape().GetDim(i)) + "]" ; | |||
| std::string reason = "it need >= 0"; | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); | |||
| GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, | |||
| desc.GetShape().GetDim(i)); | |||
| int64_t dim = desc.GetShape().GetDim(i); | |||
| if (dim < UNKNOWN_DIM_NUM) { | |||
| std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | |||
| std::string reason = "it need >= -2"; | |||
| REPORT_INPUT_ERROR( | |||
| "E19025", std::vector<std::string>({"situation", "reason"}), std::vector<std::string>({situation, reason})); | |||
| GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | |||
| return GE_GRAPH_INIT_FAILED; | |||
| } | |||
| } | |||
| @@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() { | |||
| if (!convert_flag) { | |||
| string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | |||
| " ensure top name in aipp config is the top name of data node."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| /** | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| @@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { | |||
| if (another_item->related_input_name().empty()) { | |||
| string error_msg = "Can not both set related_input_name and related_input_rank!" | |||
| " Please ensure param is the same with the first aipp config(related_input_name)."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (item->related_input_name() == another_item->related_input_name()) { | |||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | |||
| " param is different in different aipp config."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| @@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { | |||
| if (!another_item->related_input_name().empty()) { | |||
| string error_msg = "Can not both set related_input_rank and related_input_name!" | |||
| " Please ensure param is the same with the first aipp config(related_input_rank)."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (item->related_input_rank() == another_item->related_input_rank()) { | |||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | |||
| " param is different in different aipp config."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| @@ -212,7 +216,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||
| } | |||
| } | |||
| } | |||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||
| PARAM_INVALID, | |||
| "Can not config part of outputs of Data node to support AIPP, config all " | |||
| "of the outputs of Data to support AIPP, or config none of them"); | |||
| @@ -3,6 +3,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_atcstub PROTO_ATCSTUB_SRCS PROTO_ATCSTUB_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "engine/host_cpu_engine.cc" | |||
| @@ -61,7 +62,7 @@ target_link_libraries(host_cpu_engine PRIVATE | |||
| ) | |||
| ############ atcstub/libhost_cpu_engine.so ############ | |||
| add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_ATCSTUB_HDRS}) | |||
| target_compile_options(atc_host_cpu_engine PRIVATE | |||
| -Werror | |||
| @@ -84,7 +85,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||
| ${METADEF_DIR}/inc/external | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_atcstub | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -18,6 +18,7 @@ | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <securec.h> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "common/ge/ge_util.h" | |||
| #include "host_cpu_engine/common/constant/constant.h" | |||
| @@ -34,7 +35,8 @@ Status HostCpuEngine::Initialize(const std::map<string, string> &options) { | |||
| if (ops_kernel_store_ == nullptr) { | |||
| ops_kernel_store_ = MakeShared<HostCpuOpsKernelInfoStore>(); | |||
| if (ops_kernel_store_ == nullptr) { | |||
| GELOGE(FAILED, "Make HostCpuOpsKernelInfoStore failed."); | |||
| GELOGE(FAILED, "[Create][HostCpuEngine]Make HostCpuOpsKernelInfoStore failed."); | |||
| REPORT_INNER_ERROR("E19999", "HostCpuEngine::Initialize failed for new HostCpuEngine."); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -21,6 +21,7 @@ | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include <securec.h> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "host_cpu_engine/common/constant/constant.h" | |||
| #include "register/ops_kernel_builder_registry.h" | |||
| @@ -39,7 +40,8 @@ Status HostCpuOpsKernelBuilder::Initialize(const map<std::string, std::string> & | |||
| Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||
| OpDescPtr op_desc = ge_node.GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); | |||
| GELOGE(FAILED, "[Get][OpDesc]CalcOpRunningParam failed, as op desc is null"); | |||
| REPORT_INNER_ERROR("E19999", "GetOpDesc failed."); | |||
| return FAILED; | |||
| } | |||
| @@ -73,9 +75,14 @@ Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||
| GeShape output_shape = output_tensor.GetShape(); | |||
| if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || | |||
| (output_mem_size < 0)) { | |||
| GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", | |||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| GELOGE(FAILED, | |||
| "[Calc][TensorMemSize] fail for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.", | |||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| REPORT_CALL_ERROR("E19999", | |||
| "CalcTensorMemSize failed for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.", | |||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return FAILED; | |||
| } | |||
| GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", | |||
| @@ -84,8 +91,13 @@ Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||
| TensorUtils::SetSize(output_tensor, output_mem_size); | |||
| if (op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor) != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, | |||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| GELOGE(FAILED, | |||
| "[Update][OutputDesc] fail for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.", | |||
| name.c_str(), type.c_str(), i, | |||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| REPORT_CALL_ERROR("E19999", "UpdateOutputDesc failed for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.", | |||
| name.c_str(), type.c_str(), i, | |||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| @@ -33,7 +33,7 @@ const int kNumOne = 1; | |||
| } // namespace | |||
| Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | |||
| vector<GeTensorPtr> &v_output) { | |||
| GELOGI("ConcatOffsetKernel in."); | |||
| GELOGD("ConcatOffsetKernel in"); | |||
| if (op_desc_ptr == nullptr) { | |||
| GELOGE(PARAM_INVALID, "input opdesc is nullptr."); | |||
| return PARAM_INVALID; | |||
| @@ -41,7 +41,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||
| // validate attrs | |||
| int N = 0; | |||
| if (!(AttrUtils::GetInt(op_desc_ptr, "N", N))) { | |||
| GELOGW("Attr %s does not exist.", "N"); | |||
| GELOGW("Attr %s does not exist", "N"); | |||
| return NOT_CHANGED; | |||
| } | |||
| // follow IR def, the first input is concat_dim | |||
| @@ -50,7 +50,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||
| int32_t concat_dim = *(const_cast<int32_t *>(reinterpret_cast<const int32_t *>(input_0->GetData().data()))); | |||
| // validate inputs | |||
| if ((static_cast<int>(input.size()) != (N + kNumOne)) || (input.size() <= kConcatOffsetInputIndexOne)) { | |||
| GELOGW("The number of input for concat offset must be equal to %d, and must be more than one.", (N + kNumOne)); | |||
| GELOGW("The number of input for concat offset must be equal to %d, and must be more than one", (N + kNumOne)); | |||
| return NOT_CHANGED; | |||
| } | |||
| @@ -61,7 +61,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||
| GELOGW("Concat dim is bigger than the size of output_shape."); | |||
| return NOT_CHANGED; | |||
| } | |||
| GELOGI("Output shape size is %ld", output_size); | |||
| GELOGI("Output shape size is %ld.", output_size); | |||
| int32_t offset = 0; | |||
| if (output_size < 0) { | |||
| GELOGE(FAILED, "Index is negative."); | |||
| @@ -86,7 +86,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||
| output_ptr->MutableTensorDesc().SetShape(output_shape); | |||
| GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), | |||
| static_cast<size_t>(sizeof(DT_INT32) * output_size)) != GRAPH_SUCCESS, | |||
| GELOGW("set data failed"); | |||
| GELOGW("set data failed."); | |||
| return NOT_CHANGED); | |||
| v_output.push_back(output_ptr); | |||
| // caculate offset | |||
| @@ -99,7 +99,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||
| } | |||
| offset += input_dim; | |||
| } | |||
| GELOGI("ConcatOffsetKernel success."); | |||
| GELOGD("ConcatOffsetKernel success"); | |||
| return SUCCESS; | |||
| } | |||
| REGISTER_KERNEL(CONCATOFFSET, ConcatOffsetKernel); | |||
| @@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp | |||
| int32_t merged_first_dim = 0; | |||
| int64_t indices_shape_size = 0; | |||
| for (int i = 0; i < n_; i++) { | |||
| indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize(); | |||
| indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size; | |||
| // shape is [] means scalar | |||
| indices_shape_size = | |||
| input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize(); | |||
| const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data()); | |||
| for (int64_t j = 0; j < indices_shape_size; j++) { | |||
| merged_first_dim = std::max(merged_first_dim, input_indices[j]); | |||
| @@ -278,7 +278,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||
| auto indices_ptr = const_cast<int32_t *>(reinterpret_cast<const int32_t *>(indices_tensor_ptr->GetData().data())); | |||
| for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | |||
| if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | |||
| GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||
| GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||
| return NOT_CHANGED; | |||
| } | |||
| indicates_.push_back(*(indices_ptr + i)); | |||
| @@ -288,7 +288,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||
| auto indices_ptr = const_cast<int64_t *>(reinterpret_cast<const int64_t *>(indices_tensor_ptr->GetData().data())); | |||
| for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | |||
| if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | |||
| GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||
| GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||
| return NOT_CHANGED; | |||
| } | |||
| indicates_.push_back(*(indices_ptr + i)); | |||
| @@ -344,30 +344,30 @@ Status GatherV2Kernel::Check(const OpDescPtr &op_desc_ptr, const vector<ConstGeT | |||
| auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | |||
| bool is_valid_indices_data_type = indices_data_type == DT_INT32 || indices_data_type == DT_INT64; | |||
| if (!is_valid_indices_data_type) { | |||
| GELOGW("indices datatype must be DT_INT32 or DT_INT64"); | |||
| GELOGW("indices datatype must be DT_INT32 or DT_INT64."); | |||
| return NOT_CHANGED; | |||
| } | |||
| if (indices_shape.GetDimNum() > kMaxIndicatesDims) { | |||
| GELOGW("indices input only support 0 or 1 dims"); | |||
| GELOGW("indices input only support 0 or 1 dims."); | |||
| return NOT_CHANGED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void GatherV2Kernel::DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape, | |||
| const std::vector<int64_t> &y_shape) { | |||
| GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu", axis, x_shape.GetDimNum(), | |||
| GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu.", axis, x_shape.GetDimNum(), | |||
| indices_shape.GetDimNum(), y_shape.size()); | |||
| for (size_t i = 0; i < x_shape.GetDimNum(); i++) { | |||
| GELOGD("GatherV2Kernel x_shape[%zu]: %ld", i, x_shape.GetDim(i)); | |||
| GELOGD("GatherV2Kernel x_shape[%zu]: %ld.", i, x_shape.GetDim(i)); | |||
| } | |||
| for (size_t i = 0; i < indices_shape.GetDimNum(); i++) { | |||
| GELOGD("GatherV2Kernel indices_shape[%zu]: %ld", i, indices_shape.GetDim(i)); | |||
| GELOGD("GatherV2Kernel indices_shape[%zu]: %ld.", i, indices_shape.GetDim(i)); | |||
| } | |||
| for (size_t i = 0; i < y_shape.size(); i++) { | |||
| GELOGD("GatherV2Kernel y_shape[%zu]: %ld", i, y_shape[i]); | |||
| GELOGD("GatherV2Kernel y_shape[%zu]: %ld.", i, y_shape[i]); | |||
| } | |||
| for (auto ele : indicates_) { | |||
| GELOGD("GatherV2Kernel indices:%ld", ele); | |||
| GELOGD("GatherV2Kernel indices:%ld.", ele); | |||
| } | |||
| } | |||
| @@ -376,10 +376,10 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||
| GELOGI("Enter GatherV2Kernel Process."); | |||
| Status ret = Check(op_desc_ptr, input, v_output); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("param check failed."); | |||
| GELOGW("param check failed"); | |||
| return NOT_CHANGED; | |||
| } | |||
| GELOGI("GatherV2Kernel[%s] start Process.", op_desc_ptr->GetName().c_str()); | |||
| GELOGI("GatherV2Kernel[%s] start Process", op_desc_ptr->GetName().c_str()); | |||
| ConstGeTensorPtr tensor0 = input.at(kGatherV2InputIndexZero); | |||
| ConstGeTensorPtr tensor1 = input.at(kGatherV2InputIndexOne); | |||
| ConstGeTensorPtr tensor2 = input.at(kGatherV2InputIndexTwo); | |||
| @@ -394,7 +394,7 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||
| axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); | |||
| // check axis value | |||
| if (axis < 0 || (axis + 1) > static_cast<int64_t>(x_shape.GetDimNum())) { | |||
| GELOGW("axis is invalid"); | |||
| GELOGW("axis is invalid!"); | |||
| return NOT_CHANGED; | |||
| } | |||
| auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | |||
| @@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||
| // check input data type | |||
| auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | |||
| if (supported_type.find(x_data_type) == supported_type.end()) { | |||
| GELOGI("GatherV2Kernel does not support this Data type:%s", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||
| GELOGI("GatherV2Kernel does not support this Data type:%s.", | |||
| TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||
| return NOT_CHANGED; | |||
| } | |||
| // calc output shape | |||
| @@ -61,4 +61,5 @@ Status IdentityKernel::Compute(const ge::OpDescPtr op_desc, const std::vector<ge | |||
| return SUCCESS; | |||
| } | |||
| REGISTER_KERNEL(IDENTITY, IdentityKernel); | |||
| REGISTER_KERNEL(PLACEHOLDERWITHDEFAULT, IdentityKernel); | |||
| } // namespace ge | |||
| @@ -84,14 +84,14 @@ void GetOriginStrideVec(const std::vector<ge::ConstGeTensorPtr> &input, vector<i | |||
| } // namespace | |||
| Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | |||
| vector<ge::GeTensorPtr> &v_output) { | |||
| GELOGD("StridedSliceKernel in."); | |||
| GELOGD("StridedSliceKernel in"); | |||
| // 1.Check input and attrs | |||
| if (CheckAndGetAttr(attr) != SUCCESS) { | |||
| GELOGW("Check and get attrs failed.Ignore kernel."); | |||
| GELOGW("Check and get attrs failed.Ignore kernel"); | |||
| return NOT_CHANGED; | |||
| } | |||
| if (CheckInputParam(input) != SUCCESS) { | |||
| GELOGW("Check input params failed.Ignore kernel."); | |||
| GELOGW("Check input params failed.Ignore kernel"); | |||
| return NOT_CHANGED; | |||
| } | |||
| // 2.Init param with mask attrs. | |||
| @@ -120,7 +120,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||
| auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | |||
| output_dims, output_ptr.get(), stride_vec); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); | |||
| GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed"); | |||
| return NOT_CHANGED; | |||
| } | |||
| @@ -133,7 +133,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||
| GetOutputDims(final_dim_size, output_dims, v_dims); | |||
| t_d.SetShape(GeShape(v_dims)); | |||
| v_output.push_back(output_ptr); | |||
| GELOGI("StridedSliceKernel success."); | |||
| GELOGI("StridedSliceKernel success"); | |||
| return SUCCESS; | |||
| } | |||
| Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | |||
| @@ -144,7 +144,7 @@ Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | |||
| // Get all op attr value of strided_slice | |||
| for (auto &attr_2_value : attr_value_map_) { | |||
| if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | |||
| GELOGE(PARAM_INVALID, "Get %s attr failed.", attr_2_value.first.c_str()); | |||
| GELOGE(PARAM_INVALID, "Get %s attr failed", attr_2_value.first.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| @@ -182,7 +182,7 @@ Status StridedSliceKernel::CheckInputParam(const std::vector<ConstGeTensorPtr> & | |||
| return PARAM_INVALID; | |||
| } | |||
| if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | |||
| GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64."); | |||
| GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64"); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -250,7 +250,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||
| end_i = x_dims.at(i); | |||
| stride_i = 1; | |||
| } | |||
| GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||
| GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||
| begin_i, end_i, stride_i, x_dims.at(i)); | |||
| auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | |||
| if (ret != SUCCESS) { | |||
| @@ -258,7 +258,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||
| return NOT_CHANGED; | |||
| } | |||
| int64_t dim_final; | |||
| GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||
| GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||
| begin_i, end_i, stride_i, x_dims.at(i)); | |||
| (void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | |||
| output_dims.push_back(dim_final); | |||
| @@ -71,6 +71,7 @@ struct GraphExecutionContext { | |||
| std::atomic_bool is_eos_; | |||
| long profiling_level = 0; | |||
| long iteration = 0; | |||
| void *global_step = nullptr; | |||
| private: | |||
| Status status = SUCCESS; | |||
| @@ -29,7 +29,7 @@ const size_t kMinimumPiplineStages = 2; | |||
| const int kDefaultLoopCount = 10; | |||
| } | |||
| HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | |||
| : model_(model), run_flag_(false) { | |||
| : model_(model), run_flag_(false), data_dumper_(nullptr) { | |||
| } | |||
| HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { | |||
| @@ -67,6 +67,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis | |||
| future_ = std::async(std::launch::async, [&]() -> Status { | |||
| GetThreadLocalContext() = *executor_->GetContext()->ge_context; | |||
| GetContext().SetSessionId(executor_->GetContext()->session_id); | |||
| GetContext().SetContextId(executor_->GetContext()->context_id); | |||
| return RunInternal(); | |||
| }); | |||
| @@ -85,6 +86,10 @@ Status HybridModelAsyncExecutor::Stop() { | |||
| ret = future_.get(); | |||
| } | |||
| if (is_op_debug_reg_) { | |||
| op_debug_register_.UnregisterDebugForStream(stream_); | |||
| } | |||
| if (stream_ != nullptr) { | |||
| GE_CHK_RT(rtStreamDestroy(stream_)); | |||
| stream_ = nullptr; | |||
| @@ -101,6 +106,7 @@ Status HybridModelAsyncExecutor::Init() { | |||
| executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | |||
| GE_CHECK_NOTNULL(executor_); | |||
| GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | |||
| GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine"); | |||
| GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | |||
| if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | |||
| @@ -161,6 +167,7 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||
| } else { | |||
| GELOGI("HybridModel will execute in singleline mode"); | |||
| ge::GetContext().SetSessionId(executor_->GetContext()->session_id); | |||
| ge::GetContext().SetContextId(executor_->GetContext()->context_id); | |||
| ret = executor_->Execute(args); | |||
| } | |||
| ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | |||
| @@ -439,31 +446,20 @@ Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||
| TensorValue tensor_value(inputs[i].data, inputs[i].length); | |||
| args.inputs[i] = tensor_value; | |||
| } | |||
| for (size_t i = 0; i < outputs.size(); ++i) { | |||
| args.outputs.emplace_back(TensorValue(outputs[i].data, outputs[i].length)); | |||
| } | |||
| // usr must designate input tensorDesc when input shape is dynamic in inference | |||
| for (size_t i = 0; i < input_desc.size(); ++i) { | |||
| ConstGeTensorDescPtr tensor_desc_ptr = MakeShared<GeTensorDesc>(input_desc[i]); | |||
| args.input_desc.emplace_back(tensor_desc_ptr); | |||
| } | |||
| GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | |||
| for (const auto &output_tensor_desc : args.output_desc) { | |||
| output_desc.emplace_back(*output_tensor_desc); | |||
| } | |||
| for (size_t i = 0; i < args.outputs.size(); ++i) { | |||
| int64_t output_real_size = 0; | |||
| ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Get tensor size in bytes failed."); | |||
| return FAILED; | |||
| } | |||
| if (output_real_size > 0) { | |||
| if (outputs[i].length < static_cast<uint64_t>(output_real_size)) { | |||
| GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by " | |||
| "user should be greater than or equal to the real size of output[%ld]", | |||
| i, outputs[i].length, output_real_size); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, | |||
| RT_MEMCPY_DEVICE_TO_DEVICE)); | |||
| } | |||
| outputs[i].length = output_real_size; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -508,5 +504,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector< | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelAsyncExecutor::DumpOpDebug() { | |||
| const DumpProperties &dump_properties = executor_->GetContext()->dump_properties; | |||
| if (dump_properties.IsOpDebugOpen()) { | |||
| GELOGD("Opdebug is open in hybrid engine"); | |||
| uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||
| GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_)); | |||
| is_op_debug_reg_ = true; | |||
| data_dumper_.SetDumpProperties(dump_properties); | |||
| data_dumper_.SetModelName(model_->GetModelName()); | |||
| data_dumper_.SetModelId(model_->GetModelId()); | |||
| data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||
| void *global_step = nullptr; | |||
| TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| if (varible_global_step != nullptr) { | |||
| global_step = const_cast<void *>(varible_global_step->GetData()); | |||
| } | |||
| void *loop_per_iter = nullptr; | |||
| TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | |||
| if (varible_loop_per_iter != nullptr) { | |||
| loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData()); | |||
| } | |||
| void *loop_cond = nullptr; | |||
| TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); | |||
| if (varible_loop_cond != nullptr) { | |||
| loop_cond = const_cast<void *>(varible_loop_cond->GetData()); | |||
| } | |||
| data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond); | |||
| GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine"); | |||
| GELOGD("Dump op debug SUCCESS in hybrid engine"); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||