From 0b666e41b1b448701d0ca86dc51b08928c68240f Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 22 Nov 2020 23:22:24 +0800 Subject: [PATCH 001/127] For pkg3 acllib dynamic link. --- .../ops_kernel_builder_manager.cc | 20 +++++++++++-------- .../ops_kernel_builder_manager.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index e0001fcd..167be47b 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -33,6 +33,8 @@ const std::vector kHcclBuilderLibs = { "libhvd_opskernel_builder.so", "libhcom_gradtune_opskernel_builder.so" }; + +const std::string kAicoreUtilsLib = "libaicore_utils_runtime.so"; } // namespace OpsKernelBuilderManager::~OpsKernelBuilderManager() { // it's OK to call Finalize multiply times @@ -45,13 +47,11 @@ OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { } Status OpsKernelBuilderManager::Initialize(const map &options, bool is_train) { - if (is_train) { - std::string lib_paths; - GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); - plugin_manager_.reset(new (std::nothrow)PluginManager()); - GE_CHECK_NOTNULL(plugin_manager_); - GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); - } + std::string lib_paths; + GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths, is_train)); + plugin_manager_.reset(new (std::nothrow)PluginManager()); + GE_CHECK_NOTNULL(plugin_manager_); + GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); GELOGI("Number of OpBuild = %zu", kernel_builders.size()); @@ -100,7 +100,8 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n return nullptr; } -Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths) { +Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths, + bool is_train) { GELOGD("Start to execute GetLibPaths"); std::string path_base = PluginManager::GetPath(); std::string so_path = "plugin/opskernel/"; @@ -109,6 +110,9 @@ Status OpsKernelBuilderManager::GetLibPaths(const std::mapsecond != "0") { diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index 7a95ddfa..207ebc79 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -48,7 +48,7 @@ class OpsKernelBuilderManager { private: OpsKernelBuilderManager() = default; - static Status GetLibPaths(const std::map &options, std::string &lib_paths); + static Status GetLibPaths(const std::map &options, std::string &lib_paths, bool is_train); std::unique_ptr plugin_manager_; std::map ops_kernel_builders_{}; From 165e22bd069b342a600d47e0bd7b61537a0d0ada Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 30 Nov 2020 16:30:42 +0800 Subject: [PATCH 002/127] atomic_add_clear pass loop graph fix --- ge/graph/passes/atomic_addr_clean_pass.cc | 103 +++++++++++++++++----- ge/graph/passes/atomic_addr_clean_pass.h | 5 ++ 2 files changed, 84 insertions(+), 24 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 60742eb1..40931ff6 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -74,10 +74,87 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } +// just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input +bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { + // 1.Check if isAtomic attrs exist for HCOM + std::shared_ptr instance_ptr = GELib::GetInstance(); + if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { + GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str()); + return false; + } + + OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); + vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType()); + for (const auto &op_info : op_info_vec) { + if (op_info.isAtomic) { + // check peer input is DATA + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + if (in_data_anchor->GetPeerOutAnchor() != nullptr && + in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { + auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); + if (peer_in_node->GetType() == DATA) { + GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); + return false; + } + } + } + GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str()); + hcom_node_vec_.push_back(node); + return true; + } + } + return false; +} + +bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) { + auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); + if (out_data_anchor == nullptr) { + return false; + } + + for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto output_node = input_anchor->GetOwnerNode(); + // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input + // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here + if (CheckAtomicFromOpsKernel(output_node)) { + return true; + } + } + return false; +} + +bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) { + OpDescPtr op_desc = node->GetOpDesc(); + std::map> node_workspace_offset; + bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); + bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); + node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); + if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { + std::vector atomic_output_index; + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + bool is_all_output_peer_also_atomic = true; + for (const auto &output_index : atomic_output_index) { + if (!IsOutputIndexPeerInputAtomic(node, output_index)) { + is_all_output_peer_also_atomic = false; + break; + } + } + if (is_all_output_peer_also_atomic) { + GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); + return true; + } + } + return false; +} + Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec) { // Loop graph , insert clean node follow atomic node int index = 0; for (const auto &node : atomic_node_vec) { + if (CheckSkipInsertInLoopGraph(node)) { + continue; + } + // Insert atomic clean op NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph); if (clean_addr_node == nullptr) { @@ -249,32 +326,10 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { return false; } // 1.Check if isAtomic attrs exist for HCOM - std::shared_ptr instance_ptr = GELib::GetInstance(); - if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { - GELOGW("GELib not initialized"); - return false; + if (CheckAtomicFromOpsKernel(node)) { + return true; } - OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); - vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType()); - for (const auto &op_info : op_info_vec) { - if (op_info.isAtomic) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine.", op_desc->GetName().c_str()); - // check peer input is DATA - for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - if (in_data_anchor->GetPeerOutAnchor() != nullptr && - in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { - auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); - if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from DNN_HCCL engine and input is DATA.", op_desc->GetName().c_str()); - return false; - } - } - } - hcom_node_vec_.push_back(node); - return true; - } - } // 2.Check atomic attr in node std::map> node_workspace_offset; bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index ad60b7b5..8138d511 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -84,6 +84,11 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); + bool CheckAtomicFromOpsKernel(const NodePtr &node); + + bool IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index); + + bool CheckSkipInsertInLoopGraph(const NodePtr &node); vector hcom_node_vec_; bool is_loop_graph_ = false; From 9909a46e2ad1d91e571a30e541dab29f462e0f9d Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 2 Dec 2020 14:49:58 +0800 Subject: [PATCH 003/127] modify for security check of control pass --- ge/graph/passes/attach_stream_label_pass.cc | 54 +++++------- ge/graph/passes/cond_remove_pass.cc | 10 ++- ge/graph/passes/enter_pass.cc | 25 ++---- ge/graph/passes/for_pass.cc | 27 ++---- ge/graph/passes/merge_pass.cc | 9 +- ge/graph/passes/multi_batch_pass.cc | 88 +++++++------------ .../passes/switch_to_stream_switch_pass.cc | 76 ++++++++-------- .../passes/switch_to_stream_switch_pass.h | 3 +- 8 files changed, 119 insertions(+), 173 deletions(-) diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index b04643a4..c0e0f669 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -24,11 +24,7 @@ Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { FindNodes(graph); for (const auto &node : need_label_nodes_) { - OpDescPtr op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if (!op_desc->HasAttr(ATTR_NAME_STREAM_LABEL)) { - GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str()); - } + GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str()); } GE_CHK_STATUS_RET(UpdateEnterNode(), "UpdateEnterNode failed."); @@ -55,13 +51,15 @@ Status AttachStreamLabelPass::ClearStatus() { /// void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { for (const NodePtr &node : graph->GetDirectNode()) { - const std::string &type = node->GetType(); - if (type == STREAMSWITCH) { + const auto &op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + const std::string &type = op_desc->GetType(); + if ((type == STREAMSWITCH) && op_desc->HasAttr(ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG)) { stream_switch_nodes_.emplace_back(node); - } else if (type == STREAMMERGE) { - if ((node->GetOpDesc() != nullptr) && !node->GetOpDesc()->HasAttr(ATTR_NAME_NEXT_ITERATION)) { - need_label_nodes_.emplace_back(node); - } + } else if ((type == STREAMMERGE) && !op_desc->HasAttr(ATTR_NAME_NEXT_ITERATION)) { + need_label_nodes_.emplace_back(node); } else if ((type == ENTER) || (type == REFENTER)) { enter_nodes_.emplace_back(node); } @@ -83,11 +81,15 @@ void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { /// Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { std::string stream_label; + if (AttachFlag(node, stream_label) != SUCCESS) { + GELOGE(FAILED, "Attach flag for node %s failed.", node->GetName().c_str()); + return FAILED; + } + std::unordered_set branch_nodes; std::unordered_set visited; std::stack nodes; nodes.push(node); - static const std::set end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; while (!nodes.empty()) { NodePtr cur_node = nodes.top(); @@ -95,10 +97,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { if (visited.count(cur_node) > 0) { continue; } - if (AttachFlag(cur_node, stream_label) != SUCCESS) { - GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); - return FAILED; - } const std::string &type = cur_node->GetType(); for (const auto &out_node : cur_node->GetOutAllNodes()) { @@ -115,10 +113,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { visited.insert(cur_node); } - if (node->GetType() == STREAMSWITCH) { - GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); - } - for (const NodePtr &tmp_node : branch_nodes) { GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); @@ -148,11 +142,10 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea GE_CHK_BOOL_EXEC(AttrUtils::GetBool(op_desc, ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, value), return FAILED, "StreamSwitch get attr TRUE_BRANCH_STREAM failed."); stream_label += (value ? "_t" : "_f"); + GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); } else if (type == STREAMMERGE) { stream_label = node->GetName(); GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - } else if ((type == EXIT) || (type == REFEXIT)) { - GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); } return SUCCESS; @@ -166,12 +159,13 @@ Status AttachStreamLabelPass::UpdateEnterNode() { std::unordered_map> enter_active_map; for (const auto &enter_node : enter_nodes_) { for (const auto &out_ctrl_node : enter_node->GetOutControlNodes()) { - if (out_ctrl_node->GetType() == STREAMACTIVE) { - if (enter_active_map.find(out_ctrl_node) == enter_active_map.end()) { - enter_active_map[out_ctrl_node] = {enter_node}; - } else { - enter_active_map[out_ctrl_node].emplace_back(enter_node); - } + if (out_ctrl_node->GetType() != STREAMACTIVE) { + continue; + } + if (enter_active_map.find(out_ctrl_node) == enter_active_map.end()) { + enter_active_map[out_ctrl_node] = {enter_node}; + } else { + enter_active_map[out_ctrl_node].emplace_back(enter_node); } } } @@ -226,9 +220,8 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no std::string stream_label; GE_CHECK_NOTNULL(active_node); (void)AttrUtils::GetStr(active_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label); - if (stream_label.empty()) { - GELOGW("stream_label of enter_active & enter_nodes is empty."); + GELOGD("stream_label of enter_active %s is empty.", active_node->GetName().c_str()); return SUCCESS; } @@ -238,7 +231,6 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); } } - GE_CHK_STATUS_RET(SetStreamLabel(active_node, stream_label), "Set stream label failed."); return SUCCESS; } diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index e8d1493f..bf2e1170 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -37,6 +37,12 @@ Status CondRemovePass::Run(NodePtr &node) { OutDataAnchorPtr cond_out_anchor = nullptr; InDataAnchorPtr cond_in_anchor = nullptr; Status ret = GetCondInfo(node, graph, cond_out_anchor, cond_in_anchor); + if (ret == NOT_CHANGED) { + return SUCCESS; + } else if (ret != SUCCESS) { + GELOGE(FAILED, "Get cond_info for node %s failed.", node->GetName().c_str()); + return FAILED; + } int32_t cond_index = 0; GELOGD("Handle cond remove for node %s.", node->GetOpDesc()->GetName().c_str()); bool if_cond_const = CheckIfCondConstInput(cond_out_anchor, cond_in_anchor, cond_index); @@ -322,11 +328,11 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, std::string type = node->GetType(); if ((kIfOpTypes.count(type) != 0) || (kCaseOpTypes.count(type) != 0)) { if (GetCondInfoForIfCase(node, graph, cond_out_anchor, cond_in_anchor) != SUCCESS) { - GELOGE(FAILED, "Get cond_info for if node failed."); + GELOGE(FAILED, "Get cond_info for if/case node failed."); return FAILED; } } else { - GELOGD("no need cond_pass for node %s.", node->GetName().c_str()); + GELOGD("no need cond_remove_pass for node %s.", node->GetName().c_str()); return NOT_CHANGED; } diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 206d271c..afeca78f 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -16,6 +16,7 @@ #include "graph/passes/enter_pass.h" +#include "graph/debug/ge_attr_define.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "graph/utils/graph_utils.h" @@ -72,33 +73,25 @@ Status EnterPass::Run(NodePtr &node) { } Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { - auto out_nodes_of_in_node = in_node->GetOutAllNodes(); - if (out_nodes_of_in_node.size() != kOutNodesNum) { + if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } - - if (!node->GetOutControlNodes().empty()) { + bool is_constant_flag = true; + (void)AttrUtils::GetBool(node->GetOpDesc(), ENTER_ATTR_CONSTANT_FLAG, is_constant_flag); + if (!is_constant_flag) { return SUCCESS; } - for (const auto &out_node : node->GetOutDataNodes()) { - GE_CHECK_NOTNULL(out_node); - if (out_node->GetType() == MERGE) { - return SUCCESS; - } - } - GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); - auto out_data_anchor = node->GetOutDataAnchor(0); + const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); - for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); } - - auto graph = node->GetOwnerComputeGraph(); - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); + AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index f3caea35..f5280a36 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -137,7 +137,7 @@ Status ForPass::BuildForInfo(const ComputeGraphPtr &root_graph, const NodePtr &n for_info.ctrl_inputs = std::move(ctrl_inputs); for_info.ctrl_outputs = std::move(ctrl_outputs); - GELOGI("Build for_info for node %s succ.", node->GetName().c_str()); + GELOGI("Build for_info for node %s success.", node->GetName().c_str()); return SUCCESS; } @@ -159,13 +159,7 @@ OutDataAnchorPtr ForPass::FindInputWithIndex(const NodePtr &node, uint32_t index return nullptr; } - OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_anchor == nullptr) { - GELOGE(FAILED, "FindInputWithIndex %s:%u failed: peer_out_anchor is NULL.", node->GetName().c_str(), index); - return nullptr; - } - - return peer_out_anchor; + return in_data_anchor->GetPeerOutAnchor(); } /// @@ -186,20 +180,13 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetAllInDataAnchorsSize(); for (uint32_t index = FOR_DATA_INPUT; index < input_data_num; index++) { InDataAnchorPtr in_data_anchor = node->GetInDataAnchor(index); - if (in_data_anchor == nullptr) { - GELOGE(FAILED, "FindInputWithIndex %s:%u failed: in_data_anchor is NULL.", node->GetName().c_str(), index); - return FAILED; - } - GE_IF_BOOL_EXEC(in_data_anchor->GetPeerOutAnchor() == nullptr, - GELOGW("Get null input by index %d from node %s ", - in_data_anchor->GetIdx(), node->GetName().c_str()); - continue); + GE_CHECK_NOTNULL(in_data_anchor); data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); } - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { std::vector peer_in_data_anchors; - for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { peer_in_data_anchors.emplace_back(peer_in_data_anchor); } data_outputs.emplace_back(peer_in_data_anchors); @@ -207,13 +194,13 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetInControlAnchor(); GE_CHECK_NOTNULL(in_ctrl_anchor); - for (auto &peer_out_ctrl_anchor : in_ctrl_anchor->GetPeerOutControlAnchors()) { + for (const auto &peer_out_ctrl_anchor : in_ctrl_anchor->GetPeerOutControlAnchors()) { ctrl_inputs.emplace_back(peer_out_ctrl_anchor); } OutControlAnchorPtr out_ctrl_anchor = node->GetOutControlAnchor(); GE_CHECK_NOTNULL(out_ctrl_anchor); - for (auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { + for (const auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { ctrl_outputs.emplace_back(peer_in_ctrl_anchor); } diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index d2340037..80394e7a 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -21,16 +21,12 @@ #include #include "framework/common/debug/ge_log.h" -#include "common/ge_inner_error_codes.h" #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" #include "graph/passes/pass_utils.h" -using domi::PARAM_INVALID; -using domi::SUCCESS; - namespace ge { const int kValueIndexOutputIndex = 1; @@ -47,13 +43,12 @@ Status MergePass::Run(NodePtr &node) { return SUCCESS; } - auto out_data_anchors = node->GetAllOutDataAnchors(); - if (out_data_anchors.empty()) { + if (node->GetAllOutDataAnchors().empty()) { GELOGE(PARAM_INVALID, "[%s] Merge node output anchor is empty", node->GetName().c_str()); return PARAM_INVALID; } - auto in_data_nodes = node->GetInDataNodes(); + const auto &in_data_nodes = node->GetInDataNodes(); switch (in_data_nodes.size()) { case 0: { /// Case A: input_count = 0, the output of merge node is inactive as well diff --git a/ge/graph/passes/multi_batch_pass.cc b/ge/graph/passes/multi_batch_pass.cc index c7034612..74f7e30e 100644 --- a/ge/graph/passes/multi_batch_pass.cc +++ b/ge/graph/passes/multi_batch_pass.cc @@ -22,9 +22,6 @@ #include "graph/common/omg_util.h" #include "graph/utils/type_utils.h" -using std::string; -using std::vector; - namespace ge { Status MultiBatchPass::Run(ComputeGraphPtr graph) { GELOGD("MultiBatchPass Enter"); @@ -53,7 +50,7 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) { return FAILED; } std::vector> batch_shape; - vector> combined_batch; + std::vector> combined_batch; if (!CheckSwitchN(batch_shape, combined_batch)) { GELOGE(FAILED, "CheckSwitchN failed."); return FAILED; @@ -104,6 +101,7 @@ Status MultiBatchPass::ClearStatus() { /// Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node) { const auto &func_desc = case_node->GetOpDesc(); + GE_CHECK_NOTNULL(func_desc); if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { GELOGD("Graph: %s Not multi-batch, Node: %s", graph->GetName().c_str(), case_node->GetName().c_str()); return SUCCESS; @@ -114,7 +112,7 @@ Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr const auto &subgraph = graph->GetSubgraph(dynamic_branch_names[i]); GE_CHECK_NOTNULL(subgraph); - const string batch_label = "Batch_" + std::to_string(i); + const std::string batch_label = "Batch_" + std::to_string(i); for (const auto &node : subgraph->GetDirectNode()) { (void)AttrUtils::SetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label); } @@ -139,12 +137,12 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor continue; } - InDataAnchorPtr in_data_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); + const auto &in_data_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); if (in_data_anchor == nullptr) { GELOGE(FAILED, "FindPredInput failed, in_data_anchor is null, node:%s.", node->GetName().c_str()); return FAILED; } - OutDataAnchorPtr pred_input = in_data_anchor->GetPeerOutAnchor(); + const auto &pred_input = in_data_anchor->GetPeerOutAnchor(); if (pred_input == nullptr) { GELOGE(FAILED, "FindPredInput failed, pred_input is null, node:%s.", node->GetName().c_str()); return FAILED; @@ -178,12 +176,10 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor /// @return Status /// Status MultiBatchPass::GetDynamicType() { - for (const auto &switchn : switch_n_nodes_) { - auto switchn_desc = switchn->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); + for (const auto &switch_n : switch_n_nodes_) { int32_t dynamic_type = static_cast(FIXED); - if (!AttrUtils::GetInt(switchn_desc, ATTR_DYNAMIC_TYPE, dynamic_type)) { - GELOGE(FAILED, "Get attr ATTR_DYNAMIC_TYPE of node: %s failed.", switchn->GetName().c_str()); + if (!AttrUtils::GetInt(switch_n->GetOpDesc(), ATTR_DYNAMIC_TYPE, dynamic_type)) { + GELOGE(FAILED, "Get attr ATTR_DYNAMIC_TYPE of node: %s failed.", switch_n->GetName().c_str()); return FAILED; } if (dynamic_type == static_cast(FIXED)) { @@ -191,7 +187,7 @@ Status MultiBatchPass::GetDynamicType() { return FAILED; } if (dynamic_type_ != static_cast(FIXED) && dynamic_type_ != dynamic_type) { - GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE of all switchn node should be same, while one is %d and another is %d.", + GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE of all switch_n node should be same, while one is %d and another is %d.", dynamic_type, dynamic_type_); return FAILED; } @@ -212,21 +208,19 @@ Status MultiBatchPass::GetDynamicType() { Status MultiBatchPass::GetUserDesignateShape() { data_name_order_.clear(); bool first_check = true; - for (const auto &switchn : switch_n_nodes_) { - auto switchn_desc = switchn->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); - vector cur_switchn_data_name_order; - if (!AttrUtils::GetListStr(switchn_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, cur_switchn_data_name_order)) { - GELOGE(FAILED, "Get attr ATTR_USER_DESIGNEATE_SHAPE_ORDER of node: %s failed.", switchn->GetName().c_str()); + for (const auto &switch_n : switch_n_nodes_) { + std::vector cur_data_name_order; + if (!AttrUtils::GetListStr(switch_n->GetOpDesc(), ATTR_USER_DESIGNEATE_SHAPE_ORDER, cur_data_name_order)) { + GELOGE(FAILED, "Get attr ATTR_USER_DESIGNEATE_SHAPE_ORDER of node: %s failed.", switch_n->GetName().c_str()); return FAILED; } if (first_check) { - data_name_order_ = cur_switchn_data_name_order; + data_name_order_ = cur_data_name_order; first_check = false; } else { - if (data_name_order_ != cur_switchn_data_name_order) { + if (data_name_order_ != cur_data_name_order) { GELOGE(FAILED, "The ATTR_USER_DESIGNEATE_SHAPE_ORDER of switchN must be same: %s failed.", - switchn->GetName().c_str()); + switch_n->GetName().c_str()); return FAILED; } } @@ -245,7 +239,8 @@ Status MultiBatchPass::GetUserDesignateShape() { /// @param [out] combined_batch /// @return bool /// -bool MultiBatchPass::CheckSwitchN(vector> &batch_shape, vector> &combined_batch) { +bool MultiBatchPass::CheckSwitchN(std::vector> &batch_shape, + std::vector> &combined_batch) { // Check if output_num of different SwitchN is same uint32_t batch_num = 0; for (const NodePtr &node : switch_n_nodes_) { @@ -281,7 +276,8 @@ bool MultiBatchPass::CheckSwitchN(vector> &batch_shape, vector> &batch_shape, vector> &batch_shape, - vector> &combined_batch) { +bool MultiBatchPass::GetBatchInfo(uint32_t batch_num, std::vector> &batch_shape, + std::vector> &combined_batch) { // Check if output_shape of different SwitchN is same - vector> idx_batch_shape; - vector> idx_combined_batch; + std::vector> idx_batch_shape; + std::vector> idx_combined_batch; for (uint32_t i = 0; i < batch_num; i++) { idx_batch_shape.clear(); idx_combined_batch.clear(); @@ -310,7 +306,7 @@ bool MultiBatchPass::GetBatchInfo(uint32_t batch_num, vector> &b GELOGE(FAILED, "CheckDims failed, get op_desc failed, node: %s.", node->GetName().c_str()); return false; } - vector output_dims; + std::vector output_dims; if (!AttrUtils::GetListInt(op_desc->GetOutputDesc(i), ATTR_NAME_SWITCHN_PRED_VALUE, output_dims)) { GELOGE(FAILED, "CheckDims failed, get attr ATTR_NAME_SWITCHN_PRED_VALUE failed, batch_index=%u.", i); return false; @@ -385,8 +381,8 @@ Status MultiBatchPass::FindSwitchOutNodes(uint32_t batch_num) { /// @return Status /// Status MultiBatchPass::ReplaceSwitchN(const ComputeGraphPtr &graph, const OutDataAnchorPtr &pred_value, - const vector> &batch_shape, - const vector> &combined_batch) { + const std::vector> &batch_shape, + const std::vector> &combined_batch) { NodePtr pred_value_node = pred_value->GetOwnerNode(); // Create SwitchCase node const std::string &switch_case_name = pred_value_node->GetName() + "_" + STREAMSWITCHN; @@ -429,31 +425,11 @@ bool MultiBatchPass::CheckDims(const std::vector> &output_s return false; } - size_t num = output_shape.size(); - size_t dim_num = output_shape[0].size(); - for (size_t i = 1; i < num; i++) { - size_t tmp_dim_num = output_shape[i].size(); - if (dim_num != tmp_dim_num) { - GELOGE(FAILED, "CheckDims failed: dim_num not equal, output_0:%zu, output_%zu:%zu.", dim_num, i, tmp_dim_num); + for (auto iter = output_shape.begin() + 1; iter != output_shape.end(); ++iter) { + if (output_shape[0] != *iter) { return false; } } - - if (dim_num == 0) { - return true; - } - - for (size_t i = 0; i < dim_num; i++) { - int64_t dim_value = output_shape[0][i]; - for (size_t j = 1; j < num; j++) { - int64_t tmp_dim_value = output_shape[j][i]; - if (dim_value != tmp_dim_value) { - GELOGE(FAILED, "CheckDims failed: dim_value not equal, dim_index=%zu, dim_value_0:%ld, dim_value_%zu:%ld.", i, - dim_value, j, tmp_dim_value); - return false; - } - } - } return true; } @@ -468,8 +444,8 @@ bool MultiBatchPass::CheckDims(const std::vector> &output_s /// NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, const OutDataAnchorPtr &pred_value, - const vector> &batch_shape, - const vector> &combined_batch) { + const std::vector> &batch_shape, + const std::vector> &combined_batch) { OpDescPtr op_desc = MakeShared(name, STREAMSWITCHN); if (op_desc == nullptr) { GELOGE(FAILED, "Create op_desc failed, StreamSwitchN:%s.", name.c_str()); @@ -512,7 +488,7 @@ NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const GELOGE(FAILED, "set attr ATTR_NAME_PRED_VALUE failed, StreamSwitchN:%s.", name.c_str()); return nullptr; } - const string &attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); + const std::string &attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); if (!AttrUtils::SetListInt(op_desc, attr_combined_batch, combined_batch[i])) { GELOGE(FAILED, "set attr ATTR_NAME_COMBINED_BATCH failed, StreamSwitchN:%s.", name.c_str()); return nullptr; diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 529480a6..f75a104f 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -72,25 +72,26 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra std::unordered_map> cond_switch_map; for (const NodePtr &node : graph->GetDirectNode()) { GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); - if ((type == SWITCH) || (type == REFSWITCH)) { - InDataAnchorPtr in_cond_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); - GE_CHECK_NOTNULL(in_cond_anchor); - OutDataAnchorPtr peer_out_anchor = in_cond_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_anchor); - if (FindSwitchCondInput(true, peer_out_anchor) != SUCCESS) { - GELOGE(FAILED, "Find pred_input for switch_node %s failed.", node->GetName().c_str()); - return FAILED; - } + if ((type != SWITCH) && (type != REFSWITCH)) { + continue; + } + InDataAnchorPtr in_cond_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); + GE_CHECK_NOTNULL(in_cond_anchor); + OutDataAnchorPtr peer_out_anchor = in_cond_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + if (FindSwitchCondInput(peer_out_anchor) != SUCCESS) { + GELOGE(FAILED, "Find pred_input for switch_node %s failed.", node->GetName().c_str()); + return FAILED; + } - NodePtr cond_node = peer_out_anchor->GetOwnerNode(); - auto iter = cond_switch_map.find(cond_node); - if (iter == cond_switch_map.end()) { - cond_switch_map[cond_node] = { node }; - } else { - iter->second.emplace_back(node); - } - switch_nodes_.emplace_back(node); + NodePtr cond_node = peer_out_anchor->GetOwnerNode(); + auto iter = cond_switch_map.find(cond_node); + if (iter == cond_switch_map.end()) { + cond_switch_map[cond_node] = { node }; + } else { + iter->second.emplace_back(node); } + switch_nodes_.emplace_back(node); } MarkCycleDependence(cond_switch_map); @@ -241,10 +242,6 @@ Status SwitchToStreamSwitchPass::BypassSwitchNode(const NodePtr &switch_node, Ou if (idx == SWITCH_DATA_INPUT) { peer_data_anchor = peer_out_anchor; } else { - if (FindSwitchCondInput(false, peer_out_anchor) != SUCCESS) { - GELOGE(FAILED, "Find pred_input for switch_node %s failed.", switch_node->GetName().c_str()); - return FAILED; - } peer_cond_anchor = peer_out_anchor; } } @@ -254,15 +251,14 @@ Status SwitchToStreamSwitchPass::BypassSwitchNode(const NodePtr &switch_node, Ou /// /// @brief Find Switch cond input -/// @param [in] pass_switch_flag /// @param [out] peer_cond_anchor /// @return Status /// -Status SwitchToStreamSwitchPass::FindSwitchCondInput(bool pass_switch_flag, OutDataAnchorPtr &peer_cond_anchor) { +Status SwitchToStreamSwitchPass::FindSwitchCondInput(OutDataAnchorPtr &peer_cond_anchor) { NodePtr tmp_node = nullptr; - string type; - bool need_pass_type = true; - while (need_pass_type) { + std::string type; + bool pass_flag = true; + while (pass_flag) { if (tmp_node == nullptr) { tmp_node = peer_cond_anchor->GetOwnerNode(); } else { @@ -274,7 +270,7 @@ Status SwitchToStreamSwitchPass::FindSwitchCondInput(bool pass_switch_flag, OutD } GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed."); - need_pass_type = (pass_switch_flag && ((type == SWITCH) || (type == REFSWITCH))); + pass_flag = ((type == SWITCH) || (type == REFSWITCH)); } return SUCCESS; @@ -369,7 +365,7 @@ Status SwitchToStreamSwitchPass::MarkBranches(const OutDataAnchorPtr &peer_cond_ } } else { int64_t switch_group_id = GetGroupId(stream_switch); - map>> switch_group_map; + std::map>> switch_group_map; std::list false_node_list; std::list true_node_list; std::list &node_list = true_branch_flag ? true_node_list : false_node_list; @@ -389,7 +385,7 @@ Status SwitchToStreamSwitchPass::MarkBranches(const OutDataAnchorPtr &peer_cond_ /// @return group_id /// int64_t SwitchToStreamSwitchPass::GetGroupId(const NodePtr &node) { - string tailing_optimization_option; + std::string tailing_optimization_option; bool is_tailing_optimization = false; if (GetContext().GetOption(OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, tailing_optimization_option) == GRAPH_SUCCESS) { // "1" means it's True from frontend option @@ -400,7 +396,7 @@ int64_t SwitchToStreamSwitchPass::GetGroupId(const NodePtr &node) { return 0; } - string hccl_group_id; + std::string hccl_group_id; if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, hccl_group_id)) { GELOGI("Node %s can not find hccl group id.", node->GetName().c_str()); return 0; @@ -432,6 +428,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) same_cond_switch.insert(true_switch_list.begin(), true_switch_list.end()); OutDataAnchorPtr peer_cond_anchor = iter->first; + GE_CHECK_NOTNULL(peer_cond_anchor); NodePtr cond_node = peer_cond_anchor->GetOwnerNode(); GELOGI("CombineSwitchNode: cond_node=%s.", cond_node->GetName().c_str()); @@ -549,6 +546,7 @@ NodePtr SwitchToStreamSwitchPass::CreateCastOp(const ComputeGraphPtr &graph, con NodePtr cast_node = graph->AddNode(cast_desc); GE_CHK_BOOL_EXEC(cast_node != nullptr, return nullptr, "Create cast_node failed."); + // Cast node has and only has one input GE_CHK_STATUS(GraphUtils::AddEdge(peer_cond_anchor, cast_node->GetInDataAnchor(0)), "Cast add data edge failed."); return cast_node; @@ -614,24 +612,24 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no return INTERNAL_ERROR; } - for (const NodePtr &in_ctl_node : switch_node->GetInControlNodes()) { - GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctl_node->GetOutControlAnchor(), switch_node->GetInControlAnchor()), + for (const NodePtr &in_ctrl_node : switch_node->GetInControlNodes()) { + GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctrl_node->GetOutControlAnchor(), switch_node->GetInControlAnchor()), "Remove ctl edge failed."); - GE_IF_BOOL_EXEC(!in_ctl_node->GetOutControlAnchor()->IsLinkedWith(cast_node->GetInControlAnchor()), { - GE_CHK_STATUS(GraphUtils::AddEdge(in_ctl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), + GE_IF_BOOL_EXEC(!in_ctrl_node->GetOutControlAnchor()->IsLinkedWith(cast_node->GetInControlAnchor()), { + GE_CHK_STATUS(GraphUtils::AddEdge(in_ctrl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), "Add ctl edge failed."); }); - GE_IF_BOOL_EXEC(in_ctl_node->GetType() != STREAMSWITCH, continue); - if (same_cond_switch.count(in_ctl_node) > 0) { - GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), + GE_IF_BOOL_EXEC(in_ctrl_node->GetType() != STREAMSWITCH, continue); + if (same_cond_switch.count(in_ctrl_node) > 0) { + GE_CHK_STATUS(GraphUtils::RemoveEdge(in_ctrl_node->GetOutControlAnchor(), cast_node->GetInControlAnchor()), "Remove ctl edge failed."); continue; } - auto find_res1 = switch_node_map_.find(in_ctl_node); + auto find_res1 = switch_node_map_.find(in_ctrl_node); GE_IF_BOOL_EXEC(find_res1 == switch_node_map_.end(), { - GELOGE(INTERNAL_ERROR, "StreamSwitch node %s not found in switch_node_map_.", in_ctl_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "StreamSwitch node %s not found in switch_node_map_.", in_ctrl_node->GetName().c_str()); return INTERNAL_ERROR; }); auto find_res2 = find_res1->second.find(orig_switch_name); diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 48725230..7070b647 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -131,11 +131,10 @@ class SwitchToStreamSwitchPass : public GraphPass { /// /// @brief Find Switch cond input - /// @param [in] pass_switch_flag /// @param [out] peer_cond_anchor /// @return Status /// - Status FindSwitchCondInput(bool pass_switch_flag, OutDataAnchorPtr &peer_cond_anchor); + Status FindSwitchCondInput(OutDataAnchorPtr &peer_cond_anchor); /// /// @brief Create StreamSwitch Node From f5ce8025b849fe092f6ef041bcdb67865414d7ed Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Wed, 2 Dec 2020 18:23:51 +0800 Subject: [PATCH 004/127] /kind feature last release memory first reuse for cache opt --- ge/graph/build/memory/block_mem_assigner.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 00f47573..cdf768d8 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -871,8 +871,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem; auto stream_id = node_op_desc->GetStreamId(); if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) { - for (auto it = reusable_blocks_[memory_type][stream_id].begin(); - it != reusable_blocks_[memory_type][stream_id].end(); ++it) { + for (auto it = reusable_blocks_[memory_type][stream_id].rbegin(); + it != reusable_blocks_[memory_type][stream_id].rend(); ++it) { MemoryBlock *reusable_block = *it; if (!IsPostReuse(reusable_block)) { reusable_block->reuse_mem_ = false; @@ -901,7 +901,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); - reusable_blocks_[memory_type][stream_id].erase(it); + reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } } From 9716ab8321846c708ed6ece81a14ed2434abf97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Thu, 3 Dec 2020 17:14:05 +0800 Subject: [PATCH 005/127] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?= =?UTF-8?q?!502=20:=20add=20external=20headers=20for=20mindspore'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inc/external/acl/acl.h | 73 - inc/external/acl/acl_base.h | 569 ---- inc/external/acl/acl_mdl.h | 1112 -------- inc/external/acl/acl_op.h | 503 ---- inc/external/acl/acl_op_compiler.h | 105 - inc/external/acl/acl_prof.h | 297 --- inc/external/acl/acl_rt.h | 932 ------- inc/external/acl/acl_tdt.h | 276 -- inc/external/acl/error_codes/ge_error_codes.h | 57 - inc/external/acl/error_codes/rt_error_codes.h | 91 - inc/external/acl/ops/acl_cblas.h | 333 --- inc/external/acl/ops/acl_dvpp.h | 2340 ----------------- inc/external/hccl/hccl.h | 134 - inc/external/hccl/hccl_types.h | 101 - inc/external/runtime/rt_error_codes.h | 91 - 15 files changed, 7014 deletions(-) delete mode 100644 inc/external/acl/acl.h delete mode 100644 inc/external/acl/acl_base.h delete mode 100644 inc/external/acl/acl_mdl.h delete mode 100644 inc/external/acl/acl_op.h delete mode 100644 inc/external/acl/acl_op_compiler.h delete mode 100644 inc/external/acl/acl_prof.h delete mode 100644 inc/external/acl/acl_rt.h delete mode 100644 inc/external/acl/acl_tdt.h delete mode 100644 inc/external/acl/error_codes/ge_error_codes.h delete mode 100644 inc/external/acl/error_codes/rt_error_codes.h delete mode 100644 inc/external/acl/ops/acl_cblas.h delete mode 100644 inc/external/acl/ops/acl_dvpp.h delete mode 100644 inc/external/hccl/hccl.h delete mode 100644 inc/external/hccl/hccl_types.h delete mode 100644 inc/external/runtime/rt_error_codes.h diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h deleted file mode 100644 index ef5b4772..00000000 --- a/inc/external/acl/acl.h +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_ACL_H_ -#define INC_EXTERNAL_ACL_ACL_H_ - -#include "acl_rt.h" -#include "acl_op.h" -#include "acl_mdl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Current version is 1.0.0 -#define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 -#define ACL_PATCH_VERSION 0 - -/** - * @ingroup AscendCL - * @brief acl initialize - * - * @par Restriction - * The aclInit interface can be called only once in a process - * @param configPath [IN] the config path,it can be NULL - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); - -/** - * @ingroup AscendCL - * @brief acl finalize - * - * @par Restriction - * Need to call aclFinalize before the process exits. - * After calling aclFinalize,the services cannot continue to be used normally. - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclFinalize(); - -/** - * @ingroup AscendCL - * @brief query ACL interface version - * - * @param majorVersion[OUT] ACL interface major version - * @param minorVersion[OUT] ACL interface minor version - * @param patchVersion[OUT] ACL interface patch version - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h deleted file mode 100644 index 224a8ef0..00000000 --- a/inc/external/acl/acl_base.h +++ /dev/null @@ -1,569 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_ -#define INC_EXTERNAL_ACL_ACL_BASE_H_ - -#include -#include -#include "error_codes/rt_error_codes.h" -#include "error_codes/ge_error_codes.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) -#ifdef FUNC_VISIBILITY -#define ACL_FUNC_VISIBILITY _declspec(dllexport) -#else -#define ACL_FUNC_VISIBILITY -#endif -#else -#ifdef FUNC_VISIBILITY -#define ACL_FUNC_VISIBILITY __attribute__((visibility("default"))) -#else -#define ACL_FUNC_VISIBILITY -#endif -#endif - -#ifdef __GNUC__ -#define ACL_DEPRECATED __attribute__((deprecated)) -#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message))) -#elif defined(_MSC_VER) -#define ACL_DEPRECATED __declspec(deprecated) -#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message)) -#else -#define ACL_DEPRECATED -#define ACL_DEPRECATED_MESSAGE(message) -#endif - -typedef void *aclrtStream; -typedef void *aclrtEvent; -typedef void *aclrtContext; -typedef int aclError; -typedef uint16_t aclFloat16; -typedef struct aclDataBuffer aclDataBuffer; -typedef struct aclTensorDesc aclTensorDesc; - -static const int ACL_ERROR_NONE = 0; -static const int ACL_SUCCESS = 0; - -static const int ACL_ERROR_INVALID_PARAM = 100000; -static const int ACL_ERROR_UNINITIALIZE = 100001; -static const int ACL_ERROR_REPEAT_INITIALIZE = 100002; -static const int ACL_ERROR_INVALID_FILE = 100003; -static const int ACL_ERROR_WRITE_FILE = 100004; -static const int ACL_ERROR_INVALID_FILE_SIZE = 100005; -static const int ACL_ERROR_PARSE_FILE = 100006; -static const int ACL_ERROR_FILE_MISSING_ATTR = 100007; -static const int ACL_ERROR_FILE_ATTR_INVALID = 100008; -static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009; -static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010; -static const int ACL_ERROR_INVALID_MODEL_ID = 100011; -static const int ACL_ERROR_DESERIALIZE_MODEL = 100012; -static const int ACL_ERROR_PARSE_MODEL = 100013; -static const int ACL_ERROR_READ_MODEL_FAILURE = 100014; -static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015; -static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016; -static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017; -static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018; -static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019; -static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020; -static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021; -static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022; -static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023; -static const int ACL_ERROR_OP_NOT_FOUND = 100024; -static const int ACL_ERROR_OP_LOAD_FAILED = 100025; -static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026; -static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027; -static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028; -static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029; -static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030; -static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031; -static const int ACL_ERROR_INVALID_QUEUE_ID = 100032; -static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033; -static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034; -static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035; -static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036; -static const int ACL_ERROR_REPEAT_FINALIZE = 100037; -static const int ACL_ERROR_NOT_STATIC_AIPP = 100038; -static const int ACL_ERROR_COMPILING_STUB_MODE = 100039; -static const int ACL_ERROR_GROUP_NOT_SET = 100040; -static const int ACL_ERROR_GROUP_NOT_CREATE = 100041; -static const int ACL_ERROR_PROF_ALREADY_RUN = 100042; -static const int ACL_ERROR_PROF_NOT_RUN = 100043; -static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044; -static const int ACL_ERROR_DUMP_NOT_RUN = 100045; -static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; -static const int ACL_ERROR_PROF_API_CONFLICT = 148047; - -static const int ACL_ERROR_BAD_ALLOC = 200000; -static const int ACL_ERROR_API_NOT_SUPPORT = 200001; -static const int ACL_ERROR_INVALID_DEVICE = 200002; -static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003; -static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004; -static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005; -static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006; -static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007; - -static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000; - -static const int ACL_ERROR_INTERNAL_ERROR = 500000; -static const int ACL_ERROR_FAILURE = 500001; -static const int ACL_ERROR_GE_FAILURE = 500002; -static const int ACL_ERROR_RT_FAILURE = 500003; -static const int ACL_ERROR_DRV_FAILURE = 500004; -static const int ACL_ERROR_PROFILING_FAILURE = 500005; - -#define ACL_TENSOR_SHAPE_RANGE_NUM 2 -#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE - -typedef enum { - ACL_DT_UNDEFINED = -1, - ACL_FLOAT = 0, - ACL_FLOAT16 = 1, - ACL_INT8 = 2, - ACL_INT32 = 3, - ACL_UINT8 = 4, - ACL_INT16 = 6, - ACL_UINT16 = 7, - ACL_UINT32 = 8, - ACL_INT64 = 9, - ACL_UINT64 = 10, - ACL_DOUBLE = 11, - ACL_BOOL = 12, - ACL_STRING = 13, -} aclDataType; - -typedef enum { - ACL_FORMAT_UNDEFINED = -1, - ACL_FORMAT_NCHW = 0, - ACL_FORMAT_NHWC = 1, - ACL_FORMAT_ND = 2, - ACL_FORMAT_NC1HWC0 = 3, - ACL_FORMAT_FRACTAL_Z = 4, - ACL_FORMAT_NC1HWC0_C04 = 12, - ACL_FORMAT_FRACTAL_NZ = 29, -} aclFormat; - -typedef enum { - ACL_DEBUG = 0, - ACL_INFO = 1, - ACL_WARNING = 2, - ACL_ERROR = 3, -} aclLogLevel; - -/** - * @ingroup AscendCL - * @brief Converts data of type aclFloat16 to data of type float - * - * @param value [IN] Data to be converted - * - * @retval Transformed data - */ -ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value); - -/** - * @ingroup AscendCL - * @brief Converts data of type float to data of type aclFloat16 - * - * @param value [IN] Data to be converted - * - * @retval Transformed data - */ -ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value); - -/** - * @ingroup AscendCL - * @brief create data of aclDataBuffer - * - * @param data [IN] pointer to data - * @li Need to be managed by the user, - * call aclrtMalloc interface to apply for memory, - * call aclrtFree interface to release memory - * - * @param size [IN] size of data in bytes - * - * @retval pointer to created instance. nullptr if run out of memory - * - * @see aclrtMalloc | aclrtFree - */ -ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); - -/** - * @ingroup AscendCL - * @brief destroy data of aclDataBuffer - * - * @par Function - * Only the aclDataBuffer type data is destroyed here. - * The memory of the data passed in when the aclDataDataBuffer interface - * is called to create aclDataBuffer type data must be released by the user - * - * @param dataBuffer [IN] pointer to the aclDataBuffer - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclCreateDataBuffer - */ -ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); - -/** - * @ingroup AscendCL - * @brief get data address from aclDataBuffer - * - * @param dataBuffer [IN] pointer to the data of aclDataBuffer - * - * @retval data address - */ -ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer); - -/** - * @ingroup AscendCL - * @brief get data size of aclDataBuffer - * - * @param dataBuffer [IN] pointer to the data of aclDataBuffer - * - * @retval data size - */ -ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead") -ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer); - -/** - * @ingroup AscendCL - * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize - * - * @param dataBuffer [IN] pointer to the data of aclDataBuffer - * - * @retval data size - */ -ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer); - -/** - * @ingroup AscendCL - * @brief get size of aclDataType - * - * @param dataType [IN] aclDataType data the size to get - * - * @retval size of the aclDataType - */ -ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); - -// interfaces of tensor desc -/** - * @ingroup AscendCL - * @brief create data aclTensorDesc - * - * @param dataType [IN] Data types described by tensor - * @param numDims [IN] the number of dimensions of the shape - * @param dims [IN] the size of the specified dimension - * @param format [IN] tensor format - * - * @retval aclTensorDesc pointer. - * @retval nullptr if param is invalid or run out of memory - */ -ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, - aclFormat format); - -/** - * @ingroup AscendCL - * @brief destroy data aclTensorDesc - * - * @param desc [IN] pointer to the data of aclTensorDesc to destroy - */ -ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief set tensor shape range for aclTensorDesc - * - * @param desc [OUT] pointer to the data of aclTensorDesc - * @param dimsCount [IN] the number of dimensions of the shape - * @param dimsRange [IN] the range of dimensions of the shape - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, - int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); - -/** - * @ingroup AscendCL - * @brief get data type specified by the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval data type specified by the tensor description. - * @retval ACL_DT_UNDEFINED if description is null - */ -ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief get data format specified by the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval data format specified by the tensor description. - * @retval ACL_FORMAT_UNDEFINED if description is null - */ -ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief get tensor size specified by the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval data size specified by the tensor description. - * @retval 0 if description is null - */ -ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief get element count specified by the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval element count specified by the tensor description. - * @retval 0 if description is null - */ -ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief get number of dims specified by the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval number of dims specified by the tensor description. - * @retval 0 if description is null - * @retval ACL_UNKNOWN_RANK if the tensor dim is -2 - */ -ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief Get the size of the specified dim in the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * @param index [IN] index of dims, start from 0. - * - * @retval dim specified by the tensor description and index. - * @retval -1 if description or index is invalid - */ -ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead") -ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index); - -/** - * @ingroup AscendCL - * @brief Get the size of the specified dim in the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * @param index [IN] index of dims, start from 0. - * @param dimSize [OUT] size of the specified dim. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize); - -/** - * @ingroup AscendCL - * @brief Get the range of the specified dim in the tensor description - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * @param index [IN] index of dims, start from 0. - * @param dimRangeNum [IN] number of dimRange. - * @param dimRange [OUT] range of the specified dim. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, - int64_t *dimRange); - -/** - * @ingroup AscendCL - * @brief set tensor description name - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param name [IN] tensor description name - */ -ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name); - -/** - * @ingroup AscendCL - * @brief get tensor description name - * - * @param desc [IN] pointer to the instance of aclTensorDesc - * - * @retval tensor description name. - * @retval empty string if description is null - */ -ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief Convert the format in the source aclTensorDesc according to - * the specified dstFormat to generate a new target aclTensorDesc. - * The format in the source aclTensorDesc remains unchanged. - * - * @param srcDesc [IN] pointer to the source tensor desc - * @param dstFormat [IN] destination format - * @param dstDesc [OUT] pointer to the pointer to the destination tensor desc - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, - aclTensorDesc **dstDesc); - -/** - * @ingroup AscendCL - * @brief Set the storage format specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param format [IN] the storage format - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead") -ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format); - -/** - * @ingroup AscendCL - * @brief Set the storage shape specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param numDims [IN] the number of dimensions of the shape - * @param dims [IN] the size of the specified dimension - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead") -ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims); - -/** - * @ingroup AscendCL - * @brief Set the format specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param format [IN] the storage format - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format); - -/** - * @ingroup AscendCL - * @brief Set the shape specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param numDims [IN] the number of dimensions of the shape - * @param dims [IN] the size of the specified dimension - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims); - -/** - * @ingroup AscendCL - * @brief Set the original format specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param format [IN] the storage format - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format); - -/** - * @ingroup AscendCL - * @brief Set the original shape specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param numDims [IN] the number of dimensions of the shape - * @param dims [IN] the size of the specified dimension - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims); - -/** - * @ingroup AscendCL - * @brief get op description info - * - * @param desc [IN] pointer to tensor description - * @param index [IN] index of tensor - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); - -/** - * @ingroup AscendCL - * @brief get address of tensor - * - * @param desc [IN] pointer to tensor description - * - * @retval null for failed - * @retval OtherValues success - */ -ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); - -/** - * @ingroup AscendCL - * @brief Set the dynamic input name specified by the tensor description - * - * @param desc [OUT] pointer to the instance of aclTensorDesc - * @param dynamicInputName [IN] pointer to the dynamic input name - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); - -/** - * @ingroup AscendCL - * @brief an interface for users to output APP logs - * - * @param logLevel [IN] the level of current log - * @param func [IN] the function where the log is located - * @param file [IN] the file where the log is located - * @param line [IN] Number of source lines where the log is located - * @param fmt [IN] the format of current log - * @param ... [IN] the value of current log - */ -ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, - const char *fmt, ...); - -#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h deleted file mode 100644 index 0652358d..00000000 --- a/inc/external/acl/acl_mdl.h +++ /dev/null @@ -1,1112 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_ -#define INC_EXTERNAL_ACL_ACL_MODEL_H_ - -#include -#include - -#include "acl_base.h" -#include "acl_rt.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ACL_MAX_DIM_CNT 128 -#define ACL_MAX_TENSOR_NAME_LEN 128 -#define ACL_MAX_BATCH_NUM 128 -#define ACL_MAX_HW_NUM 128 -#define ACL_MAX_SHAPE_COUNT 128 -#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF - -#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" -#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" - -typedef struct aclmdlDataset aclmdlDataset; -typedef struct aclmdlDesc aclmdlDesc; -typedef struct aclmdlAIPP aclmdlAIPP; -typedef struct aclAippExtendInfo aclAippExtendInfo; - -typedef enum { - ACL_YUV420SP_U8 = 1, - ACL_XRGB8888_U8, - ACL_RGB888_U8, - ACL_YUV400_U8, - ACL_NC1HWC0DI_FP16, - ACL_NC1HWC0DI_S8, - ACL_ARGB8888_U8, - ACL_YUYV_U8, - ACL_YUV422SP_U8, - ACL_AYUV444_U8, - ACL_RAW10, - ACL_RAW12, - ACL_RAW16, - ACL_RAW24, - ACL_AIPP_RESERVED = 0xffff, -} aclAippInputFormat; - -typedef enum { - ACL_DATA_WITHOUT_AIPP = 0, - ACL_DATA_WITH_STATIC_AIPP, - ACL_DATA_WITH_DYNAMIC_AIPP, - ACL_DYNAMIC_AIPP_NODE -} aclmdlInputAippType; - -typedef struct aclmdlIODims { - char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ - size_t dimCount; /**< dim array count */ - int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ -} aclmdlIODims; - -typedef struct aclAippDims { - aclmdlIODims srcDims; /**< input dims before model transform */ - size_t srcSize; /**< input size before model transform */ - aclmdlIODims aippOutdims; /**< aipp output dims */ - size_t aippOutSize; /**< aipp output size */ -} aclAippDims; - -typedef struct aclmdlBatch { - size_t batchCount; /**< batch array count */ - uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ -} aclmdlBatch; - -typedef struct aclmdlHW { - size_t hwCount; /**< height&width array count */ - uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ -} aclmdlHW; - -typedef struct aclAippInfo { - aclAippInputFormat inputFormat; - int32_t srcImageSizeW; - int32_t srcImageSizeH; - int8_t cropSwitch; - int32_t loadStartPosW; - int32_t loadStartPosH; - int32_t cropSizeW; - int32_t cropSizeH; - int8_t resizeSwitch; - int32_t resizeOutputW; - int32_t resizeOutputH; - int8_t paddingSwitch; - int32_t leftPaddingSize; - int32_t rightPaddingSize; - int32_t topPaddingSize; - int32_t bottomPaddingSize; - int8_t cscSwitch; - int8_t rbuvSwapSwitch; - int8_t axSwapSwitch; - int8_t singleLineMode; - int32_t matrixR0C0; - int32_t matrixR0C1; - int32_t matrixR0C2; - int32_t matrixR1C0; - int32_t matrixR1C1; - int32_t matrixR1C2; - int32_t matrixR2C0; - int32_t matrixR2C1; - int32_t matrixR2C2; - int32_t outputBias0; - int32_t outputBias1; - int32_t outputBias2; - int32_t inputBias0; - int32_t inputBias1; - int32_t inputBias2; - int32_t meanChn0; - int32_t meanChn1; - int32_t meanChn2; - int32_t meanChn3; - float minChn0; - float minChn1; - float minChn2; - float minChn3; - float varReciChn0; - float varReciChn1; - float varReciChn2; - float varReciChn3; - aclFormat srcFormat; - aclDataType srcDatatype; - size_t srcDimNum; - size_t shapeCount; - aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; - aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ -} aclAippInfo; - -/** - * @ingroup AscendCL - * @brief Create data of type aclmdlDesc - * - * @retval the aclmdlDesc pointer - */ -ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc(); - -/** - * @ingroup AscendCL - * @brief destroy data of type aclmdlDesc - * - * @param modelDesc [IN] Pointer to almdldlDesc to be destroyed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc); - -/** - * @ingroup AscendCL - * @brief Get aclmdlDesc data of the model according to the model ID - * - * @param modelDesc [OUT] aclmdlDesc pointer - * @param modelId [IN] model id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId); - -/** - * @ingroup AscendCL - * @brief Get the number of the inputs of - * the model according to data of aclmdlDesc - * - * @param modelDesc [IN] aclmdlDesc pointer - * - * @retval input size with aclmdlDesc - */ -ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc); - -/** - * @ingroup AscendCL - * @brief Get the number of the output of - * the model according to data of aclmdlDesc - * - * @param modelDesc [IN] aclmdlDesc pointer - * - * @retval output size with aclmdlDesc - */ -ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc); - -/** - * @ingroup AscendCL - * @brief Get the size of the specified input according to - * the data of type aclmdlDesc - * - * @param modelDesc [IN] aclmdlDesc pointer - * @param index [IN] the size of the number of inputs to be obtained, - * the index value starts from 0 - * - * @retval Specify the size of the input - */ -ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief Get the size of the specified output according to - * the data of type aclmdlDesc - * - * @param modelDesc [IN] aclmdlDesc pointer - * @param index [IN] the size of the number of outputs to be obtained, - * the index value starts from 0 - * - * @retval Specify the size of the output - */ -ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief Create data of type aclmdlDataset - * - * @retval the aclmdlDataset pointer - */ -ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset(); - -/** - * @ingroup AscendCL - * @brief destroy data of type aclmdlDataset - * - * @param dataset [IN] Pointer to aclmdlDataset to be destroyed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset); - -/** - * @ingroup AscendCL - * @brief Add aclDataBuffer to aclmdlDataset - * - * @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added - * @param dataBuffer [IN] aclDataBuffer address to be added - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer); - -/** - * @ingroup AscendCL - * @brief Get the number of aclDataBuffer in aclmdlDataset - * - * @param dataset [IN] aclmdlDataset poiter - * - * @retval the number of aclDataBuffer - */ -ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset); - -/** - * @ingroup AscendCL - * @brief Get the aclDataBuffer in aclmdlDataset by index - * - * @param dataset [IN] aclmdlDataset poiter - * @param index [IN] the index of aclDataBuffer - * - * @retval Get successfully, return the address of aclDataBuffer - * @retval Failure return NULL - */ -ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index); - -/** - * @ingroup AscendCL - * @brief Load offline model data from files - * and manage memory internally by the system - * - * @par Function - * After the system finishes loading the model, - * the model ID returned is used as a mark to identify the model - * during subsequent operations - * - * @param modelPath [IN] Storage path for offline model files - * @param modelId [OUT] Model ID generated after - * the system finishes loading the model - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId); - -/** - * @ingroup AscendCL - * @brief Load offline model data from memory and manage the memory of - * model running internally by the system - * - * @par Function - * After the system finishes loading the model, - * the model ID returned is used as a mark to identify the model - * during subsequent operations - * - * @param model [IN] Model data stored in memory - * @param modelSize [IN] model data size - * @param modelId [OUT] Model ID generated after - * the system finishes loading the model - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); - -/** - * @ingroup AscendCL - * @brief Load offline model data from a file, - * and the user manages the memory of the model run by itself - * - * @par Function - * After the system finishes loading the model, - * the model ID returned is used as a mark to identify the model - * during subsequent operations. - * @param modelPath [IN] Storage path for offline model files - * @param modelId [OUT] Model ID generated after finishes loading the model - * @param workPtr [IN] A pointer to the working memory - * required by the model on the Device,can be null - * @param workSize [IN] The amount of working memory required by the model - * @param weightPtr [IN] Pointer to model weight memory on Device - * @param weightSize [IN] The amount of weight memory required by the model - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, - size_t workSize, void *weightPtr, size_t weightSize); - -/** - * @ingroup AscendCL - * @brief Load offline model data from memory, - * and the user can manage the memory of model running - * - * @par Function - * After the system finishes loading the model, - * the model ID returned is used as a mark to identify the model - * during subsequent operations - * @param model [IN] Model data stored in memory - * @param modelSize [IN] model data size - * @param modelId [OUT] Model ID generated after finishes loading the model - * @param workPtr [IN] A pointer to the working memory - * required by the model on the Device,can be null - * @param workSize [IN] work memory size - * @param weightPtr [IN] Pointer to model weight memory on Device,can be null - * @param weightSize [IN] The amount of weight memory required by the model - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, - void *workPtr, size_t workSize, void *weightPtr, - size_t weightSize); - -/** - * @ingroup AscendCL - * @brief load model from file with async queue - * - * @param modelPath [IN] model path - * @param modelId [OUT] return model id if load success - * @param inputQ [IN] input queue pointer - * @param inputQNum [IN] input queue num - * @param outputQ [IN] output queue pointer - * @param outputQNum [IN] output queue num - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ, - size_t inputQNum, const uint32_t *outputQ, size_t outputQNum); - -/** - * @ingroup AscendCL - * @brief load model from memory with async queue - * - * @param model [IN] model memory which user manages - * @param modelSize [IN] model size - * @param modelId [OUT] return model id if load success - * @param inputQ [IN] input queue pointer - * @param inputQNum [IN] input queue num - * @param outputQ [IN] output queue pointer - * @param outputQNum [IN] output queue num - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, - const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, - size_t outputQNum); - -/** - * @ingroup AscendCL - * @brief Execute model synchronous inference until the inference result is returned - * - * @param modelId [IN] ID of the model to perform inference - * @param input [IN] Input data for model inference - * @param output [OUT] Output data for model inference - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output); - -/** - * @ingroup AscendCL - * @brief Execute model asynchronous inference until the inference result is returned - * - * @param modelId [IN] ID of the model to perform inference - * @param input [IN] Input data for model inference - * @param output [OUT] Output data for model inference - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem - */ -ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief unload model with model id - * - * @param modelId [IN] model id to be unloaded - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId); - -/** - * @ingroup AscendCL - * @brief Get the weight memory size and working memory size - * required for model execution according to the model file - * - * @param fileName [IN] Model path to get memory information - * @param workSize [OUT] The amount of working memory for model executed - * @param weightSize [OUT] The amount of weight memory for model executed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize); - -/** - * @ingroup AscendCL - * @brief Obtain the weights required for - * model execution according to the model data in memory - * - * @par Restriction - * The execution and weight memory is Device memory, - * and requires user application and release. - * @param model [IN] model memory which user manages - * @param modelSize [IN] model data size - * @param workSize [OUT] The amount of working memory for model executed - * @param weightSize [OUT] The amount of weight memory for model executed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize, - size_t *weightSize); - -/** - * @ingroup AscendCL - * @brief In dynamic batch scenarios, - * it is used to set the number of images processed - * at one time during model inference - * - * @param modelId [IN] model id - * @param dataset [IN|OUT] data for model inference - * @param index [IN] index of dynamic tensor - * @param batchSize [IN] Number of images processed at a time during model - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, - uint64_t batchSize); - -/** - * @ingroup AscendCL - * @brief Sets the H and W of the specified input of the model - * - * @param modelId [IN] model id - * @param dataset [IN|OUT] data for model inference - * @param index [IN] index of dynamic tensor - * @param height [IN] model height - * @param width [IN] model width - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, - uint64_t height, uint64_t width); - -/** - * @ingroup AscendCL - * @brief Sets the dynamic dims of the specified input of the model - * - * @param modelId [IN] model id - * @param dataset [IN|OUT] data for model inference - * @param index [IN] index of dynamic dims - * @param dims [IN] value of dynamic dims - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index, - const aclmdlIODims *dims); - -/** - * @ingroup AscendCL - * @brief get input dims info - * - * @param modelDesc [IN] model description - * @param index [IN] input tensor index - * @param dims [OUT] dims info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlGetInputDimsV2 - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); - -/** - * @ingroup AscendCL - * @brief get input dims info(version 2), especially for static aipp - * it is the same with aclmdlGetInputDims while model without static aipp - * - * @param modelDesc [IN] model description - * @param index [IN] input tensor index - * @param dims [OUT] dims info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlGetInputDims - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); - -/** - * @ingroup AscendCL - * @brief get output dims info - * - * @param modelDesc [IN] model description - * @param index [IN] output tensor index - * @param dims [OUT] dims info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); - -/** - * @ingroup AscendCL - * @brief get current output dims info - * - * @par Function - * The following use cases are supported: - * @li Get current output shape when model is dynamic and - * dynamic shape info is set - * @li Get max output shape when model is dynamic and - * dynamic shape info is not set - * @li Get actual output shape when model is static - * - * @param modelDesc [IN] model description - * @param index [IN] output tensor index - * @param dims [OUT] dims info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); - -/** - * @ingroup AscendCL - * @brief get input name by index - * - * @param modelDesc [IN] model description - * @param index [IN] intput tensor index - * - * @retval input tensor name,the same life cycle with modelDesc - */ -ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get output name by index - * - * @param modelDesc [IN] model description - * @param index [IN] output tensor index - * - * @retval output tensor name,the same life cycle with modelDesc - */ -ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get input format by index - * - * @param modelDesc [IN] model description - * @param index [IN] intput tensor index - * - * @retval input tensor format - */ -ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get output format by index - * - * @param modelDesc [IN] model description - * @param index [IN] output tensor index - * - * @retval output tensor format - */ -ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get input data type by index - * - * @param modelDesc [IN] model description - * @param index [IN] intput tensor index - * - * @retval input tensor data type - */ -ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get output data type by index - * - * @param modelDesc [IN] model description - * @param index [IN] output tensor index - * - * @retval output tensor data type - */ -ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index); - -/** - * @ingroup AscendCL - * @brief get input tensor index by name - * - * @param modelDesc [IN] model description - * @param name [IN] intput tensor name - * @param index [OUT] intput tensor index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); - -/** - * @ingroup AscendCL - * @brief get output tensor index by name - * - * @param modelDesc [IN] model description - * @param name [IN] output tensor name - * @param index [OUT] output tensor index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); - -/** - * @ingroup AscendCL - * @brief get dynamic batch info - * - * @param modelDesc [IN] model description - * @param batch [OUT] dynamic batch info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch); - -/** - * @ingroup AscendCL - * @brief get dynamic height&width info - * - * @param modelDesc [IN] model description - * @param index [IN] input tensor index - * @param hw [OUT] dynamic height&width info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw); - -/** - * @ingroup AscendCL - * @brief get dynamic gear count - * - * @param modelDesc [IN] model description - * @param index [IN] unused, must be -1 - * @param gearCount [OUT] dynamic gear count - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index, - size_t *gearCount); - -/** - * @ingroup AscendCL - * @brief get dynamic dims info - * - * @param modelDesc [IN] model description - * @param index [IN] unused, must be -1 - * @param dims [OUT] value of dynamic dims - * @param gearCount [IN] dynamic gear count - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims, - size_t gearCount); - -/** - * @ingroup AscendCL - * @brief Create data of type aclmdlAIPP - * - * @param batchSize [IN] batchsizes of model - * - * @retval the aclmdlAIPP pointer - */ -ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize); - -/** - * @ingroup AscendCL - * @brief destroy data of type aclmdlAIPP - * - * @param aippParmsSet [IN] Pointer for aclmdlAIPP to be destroyed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet); - -/** - * @ingroup AscendCL - * @brief set InputFormat of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param inputFormat [IN] The inputFormat of aipp - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat); - -/** - * @ingroup AscendCL - * @brief set cscParms of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param csc_switch [IN] Csc switch - * @param cscMatrixR0C0 [IN] Csc_matrix_r0_c0 - * @param cscMatrixR0C1 [IN] Csc_matrix_r0_c1 - * @param cscMatrixR0C2 [IN] Csc_matrix_r0_c2 - * @param cscMatrixR1C0 [IN] Csc_matrix_r1_c0 - * @param cscMatrixR1C1 [IN] Csc_matrix_r1_c1 - * @param cscMatrixR1C2 [IN] Csc_matrix_r1_c2 - * @param cscMatrixR2C0 [IN] Csc_matrix_r2_c0 - * @param cscMatrixR2C1 [IN] Csc_matrix_r2_c1 - * @param cscMatrixR2C2 [IN] Csc_matrix_r2_c2 - * @param cscOutputBiasR0 [IN] Output Bias for RGB to YUV, element of row 0, unsigned number - * @param cscOutputBiasR1 [IN] Output Bias for RGB to YUV, element of row 1, unsigned number - * @param cscOutputBiasR2 [IN] Output Bias for RGB to YUV, element of row 2, unsigned number - * @param cscInputBiasR0 [IN] Input Bias for YUV to RGB, element of row 0, unsigned number - * @param cscInputBiasR1 [IN] Input Bias for YUV to RGB, element of row 1, unsigned number - * @param cscInputBiasR2 [IN] Input Bias for YUV to RGB, element of row 2, unsigned number - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, - int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, - int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, - int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, - uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, - uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, - uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); - -/** - * @ingroup AscendCL - * @brief set rb/ub swap switch of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param rbuvSwapSwitch [IN] rb/ub swap switch - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); - -/** - * @ingroup AscendCL - * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param axSwapSwitch [IN] RGBA->ARGB, YUVA->AYUV swap switch - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); - -/** - * @ingroup AscendCL - * @brief set source image of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param srcImageSizeW [IN] Source image width - * @param srcImageSizeH [IN] Source image height - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, - int32_t srcImageSizeH); - -/** - * @ingroup AscendCL - * @brief set resize switch of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param scfSwitch [IN] Resize switch - * @param scfInputSizeW [IN] Input width of scf - * @param scfInputSizeH [IN] Input height of scf - * @param scfOutputSizeW [IN] Output width of scf - * @param scfOutputSizeH [IN] Output height of scf - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, - int32_t scfInputSizeH, int32_t scfOutputSizeW, - int32_t scfOutputSizeH, uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set cropParams of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param cropSwitch [IN] Crop switch - * @param cropStartPosW [IN] The start horizontal position of cropping - * @param cropStartPosH [IN] The start vertical position of cropping - * @param cropSizeW [IN] Crop width - * @param cropSizeH [IN] Crop height - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, - int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, - uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set paddingParams of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param paddingSwitch [IN] Padding switch - * @param paddingSizeTop [IN] Top padding size - * @param paddingSizeBottom [IN] Bottom padding size - * @param paddingSizeLeft [IN] Left padding size - * @param paddingSizeRight [IN] Right padding size - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, - int32_t paddingSizeTop, int32_t paddingSizeBottom, - int32_t paddingSizeLeft, int32_t paddingSizeRight, - uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set DtcPixelMean of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param dtcPixelMeanChn0 [IN] Mean value of channel 0 - * @param dtcPixelMeanChn1 [IN] Mean value of channel 1 - * @param dtcPixelMeanChn2 [IN] Mean value of channel 2 - * @param dtcPixelMeanChn3 [IN] Mean value of channel 3 - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, - int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, - int16_t dtcPixelMeanChn3, uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set DtcPixelMin of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param dtcPixelMinChn0 [IN] Min value of channel 0 - * @param dtcPixelMinChn1 [IN] Min value of channel 1 - * @param dtcPixelMinChn2 [IN] Min value of channel 2 - * @param dtcPixelMinChn3 [IN] Min value of channel 3 - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, - float dtcPixelMinChn1, float dtcPixelMinChn2, - float dtcPixelMinChn3, uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set PixelVarReci of type aclmdlAIPP - * - * @param aippParmsSet [OUT] Pointer for aclmdlAIPP - * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0 - * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1 - * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2 - * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3 - * @param batchIndex [IN] Batch parameter index - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, - float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, - float dtcPixelVarReciChn3, uint64_t batchIndex); - -/** - * @ingroup AscendCL - * @brief set aipp parameters to model - * - * @param modelId [IN] model id - * @param dataset [IN] Pointer of dataset - * @param index [IN] index of input for aipp data(ACL_DYNAMIC_AIPP_NODE) - * @param aippParmsSet [IN] Pointer for aclmdlAIPP - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, - const aclmdlAIPP *aippParmsSet); - -/** - * @ingroup AscendCL - * @brief set aipp parameters to model - * - * @param modelId [IN] model id - * @param dataset [IN] Pointer of dataset - * @param index [IN] index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP) - * @param aippParmsSet [IN] Pointer for aclmdlAIPP - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, - const aclmdlAIPP *aippParmsSet); - -/** - * @ingroup AscendCL - * @brief get input aipp type - * - * @param modelId [IN] model id - * @param index [IN] index of input - * @param type [OUT] aipp type for input.refrer to aclmdlInputAippType(enum) - * @param dynamicAttachedDataIndex [OUT] index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE) - * valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, - size_t *dynamicAttachedDataIndex); - -/** - * @ingroup AscendCL - * @brief get static aipp parameters from model - * - * @param modelId [IN] model id - * @param index [IN] index of tensor - * @param aippinfo [OUT] Pointer for static aipp info - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp - * @retval OtherValues Failure - * - * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | - * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); - -/** - * @ingroup AscendCL - * @brief get op description info - * - * @param deviceId [IN] device id - * @param streamId [IN] stream id - * @param taskId [IN] task id - * @param opName [OUT] pointer to op name - * @param opNameLen [IN] the length of op name - * @param inputDesc [OUT] pointer to input description - * @param numInputs [OUT] the number of input tensor - * @param outputDesc [OUT] pointer to output description - * @param numOutputs [OUT] the number of output tensor - * - * @retval ACL_SUCCESS The function is successfully executed - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, - char *opName, size_t opNameLen, aclTensorDesc **inputDesc, - size_t *numInputs, aclTensorDesc **outputDesc, - size_t *numOutputs); - -/** - * @ingroup AscendCL - * @brief init dump - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); - -/** - * @ingroup AscendCL - * @brief set param of dump - * - * @param dumpCfgPath [IN] the path of dump config - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); - -/** - * @ingroup AscendCL - * @brief finalize dump. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h deleted file mode 100644 index 882c6ae6..00000000 --- a/inc/external/acl/acl_op.h +++ /dev/null @@ -1,503 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef INC_EXTERNAL_ACL_ACL_OP_H_ -#define INC_EXTERNAL_ACL_ACL_OP_H_ - -#include "acl_base.h" -#include "acl_rt.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct aclopHandle aclopHandle; -typedef struct aclopAttr aclopAttr; -typedef struct aclopKernelDesc aclopKernelDesc; - -typedef void (*aclDataDeallocator)(void *data, size_t length); - -static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; - -typedef enum aclEngineType { - ACL_ENGINE_SYS, - ACL_ENGINE_AICORE, - ACL_ENGINE_VECTOR, -} aclopEngineType; - -/** - * @ingroup AscendCL - * @brief Set base directory that contains single op models - * - * @par Restriction - * The aclopSetModelDir interface can be called only once in a process. - * @param modelDir [IN] path of the directory - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir); - -/** - * @ingroup AscendCL - * @brief load single op models from memory - * - * @par Restriction - * The aclopLoad interface can be called more than one times in a process. - * @param model [IN] address of single op models - * @param modelSize [IN] size of single op models - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize); - -/** - * @ingroup AscendCL - * @brief create data of type aclopAttr - * - * @retval pointer to created instance. - * @retval nullptr if run out of memory - */ -ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr(); - -/** - * @ingroup AscendCL - * @brief destroy data of typ aclopAttr - * - * @param attr [IN] pointer to the instance of aclopAttr - */ -ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is bool - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param attrValue [IN] attribute value - * false if attrValue is 0, true otherwise. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is int64_t - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param attrValue [IN] attribute value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is float - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param attrValue [IN] attribute value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is string - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param attrValue [IN] attribute value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is list of bools - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param numValues [IN] number of values. false if attrValue is 0, true otherwise. - * @param values [IN] pointer to values - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, - const uint8_t *values); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is list of ints - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param numValues [IN] number of values - * @param values [IN] pointer to values - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, - const int64_t *values); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is list of floats - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param numValues [IN] number of values - * @param values [IN] pointer to values - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, - const float *values); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is list of strings - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param numValues [IN] number of values - * @param values [IN] pointer to values - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, - const char **values); - -/** - * @ingroup AscendCL - * @brief set an attribute. the type of the attribute is list of list of ints - * - * @param attr [OUT] pointer to the instance of aclopAttr - * @param attrName [IN] attribute name - * @param numLists [IN] number of lists - * @param numValues [IN] pointer to number of values of each list - * @param values [IN] pointer to values - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, - const int *numValues, const int64_t *const values[]); - -/** - * @ingroup AscendCL - * @brief Load and execute the specified operator asynchronously - * - * @par Restriction - * @li The input and output organization of each operator is different, - * and the application needs to organize the operator strictly - * according to the operator input and output parameters when calling. - * @li When the user calls aclopExecute, - * the ACL finds the corresponding task according to the optype, - * the description of the input tesnsor, - * the description of the output tesnsor, and attr, and issues the execution. - * - * @param opType [IN] type of op - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param inputs [IN] pointer to array of input buffers - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param outputs [OUT] pointer to array of output buffers - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") -ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - const aclDataBuffer *const inputs[], int numOutputs, - const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], - const aclopAttr *attr, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Load and execute the specified operator - * The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc - * - * @par Restriction - * @li The input and output organization of each operator is different, - * and the application needs to organize the operator strictly - * according to the operator input and output parameters when calling. - * @li When the user calls aclopExecuteV2, - * the ACL finds the corresponding task according to the optype, - * the description of the input tesnsor, - * the description of the output tesnsor, and attr, and issues the execution. - * - * @param opType [IN] type of op - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param inputs [IN] pointer to array of input buffers - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN|OUT] pointer to array of output tensor descriptions - * @param outputs [OUT] pointer to array of output buffers - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], - aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a instance of aclopHandle. - * - * @param opType [IN] type of op - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param opAttr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, - aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief destroy aclopHandle instance - * - * @param handle [IN] pointer to the instance of aclopHandle - */ -ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); - -/** - * @ingroup AscendCL - * @brief execute an op with the handle. - * can save op model matching cost compared with aclopExecute - * - * @param handle [IN] pointer to the instance of aclopHandle. - * The aclopCreateHandle interface has been called - * in advance to create aclopHandle type data. - * @param numInputs [IN] number of inputs - * @param inputs [IN] pointer to array of input buffers. - * The aclCreateDataBuffer interface has been called - * in advance to create aclDataBuffer type data. - * @param numOutputs [IN] number of outputs - * @param outputs [OUT] pointer to array of output buffers - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclopCreateHandle | aclCreateDataBuffer - */ -ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, - const aclDataBuffer *const inputs[], int numOutputs, - aclDataBuffer *const outputs[], aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief cast data type - * - * @param srcDesc [IN] source tensor desc - * @param srcBuffer [IN] source tensor buffer - * @param dstDesc [IN] destination tensor desc - * @param dstBuffer [OUT] destination tensor buffer - * @param truncate [IN] do not truncate if value is 0, truncate otherwise - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, - const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for casting datatype - * - * @param srcDesc [IN] source tensor desc - * @param dstDesc [IN] destination tensor desc - * @param truncate [IN] do not truncate if value is 0, truncate otherwise - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, - aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief create kernel - * - * @param opType [IN] op type - * @param kernelId [IN] kernel id - * @param kernelName [IN] kernel name - * @param binData [IN] kernel bin data - * @param binSize [IN] kernel bin size - * @param enginetype [IN] enigne type - * @param deallocator [IN] callback function for deallocating bin data, - * null if bin data to be deallocated by caller - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclopCompile - */ -ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, - void *binData, int binSize, aclopEngineType enginetype, - aclDataDeallocator deallocator); - -/** - * @ingroup AscendCL - * @brief create kernel - * - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param opAttr [IN] pointer to instance of aclopAttr - * @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, - aclopKernelDesc *aclopKernelDesc); - -/** - * @ingroup AscendCL - * @brief register compile function - * - * @param opType [IN] op type - * @param func [IN] compile function - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclopUnregisterCompileFunc - */ -ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func); - -/** - * @ingroup AscendCL - * @brief unregister compile function - * - * @param opType [IN] op type - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); - -/** - * @ingroup AscendCL - * @brief set kernel args - * - * @param kernelDesc [IN] pointer to instance of aclopKernelDesc - * @param kernelId [IN] kernel id - * @param blockDim [IN] block dim - * @param args [IN] args - * @param argSize [IN] size in bytes of args - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, - const void *args, uint32_t argSize); - -/** - * @ingroup AscendCL - * @brief set workspace sizes - * - * @param kernelDesc [IN] pointer to instance of aclopKernelDesc - * @param numWorkspaces [IN] number of workspaces - * @param workspaceSizes [IN] pointer to array of sizes of workspaces - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces, - size_t *workspaceSizes); - -/** - * @ingroup AscendCL - * @brief compile op with dynamic shape - * - * @param opType [IN] op type - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *attr); - -/** - * @ingroup AscendCL - * @brief inferShape the specified operator synchronously - * - * @param opType [IN] type of op - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param inputs [IN] pointer to array of input buffers - * @param numOutputs [IN] number of outputs - * @param outputDesc [OUT] pointer to array of output tensor descriptions - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], - aclopAttr *attr); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_OP_H_ diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h deleted file mode 100644 index 9bf5adf0..00000000 --- a/inc/external/acl/acl_op_compiler.h +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ -#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ - -#include "acl_base.h" -#include "acl_op.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; - -typedef enum { - ACL_PRECISION_MODE, - ACL_AICORE_NUM, - ACL_AUTO_TUNE_MODE, - ACL_OP_SELECT_IMPL_MODE, - ACL_OPTYPELIST_FOR_IMPLMODE, - ACL_OP_DEBUG_LEVEL, - ACL_DEBUG_DIR, - ACL_OP_COMPILER_CACHE_MODE, - ACL_OP_COMPILER_CACHE_DIR -} aclCompileOpt; - -/** - * @ingroup AscendCL - * @brief compile op - * - * @param opType [IN] op type - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * @param engineType [IN] engine type - * @param compileFlag [IN] compile flag - * @param opPath [IN] path of op - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - int numOutputs, const aclTensorDesc *const outputDesc[], - const aclopAttr *attr, aclopEngineType engineType, - aclopCompileType compileFlag, const char *opPath); - -/** - * @ingroup AscendCL - * @brief compile and execute op - * - * @param opType [IN] op type - * @param numInputs [IN] number of inputs - * @param inputDesc [IN] pointer to array of input tensor descriptions - * @param inputs [IN] pointer to array of input buffers - * @param numOutputs [IN] number of outputs - * @param outputDesc [IN] pointer to array of output tensor descriptions - * @param outputs [IN] pointer to array of outputs buffers - * @param attr [IN] pointer to instance of aclopAttr. - * may pass nullptr if the op has no attribute - * @param engineType [IN] engine type - * @param compileFlag [IN] compile flag - * @param opPath [IN] path of op - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( - const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], - int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, - aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief set compile option - * - * @param aclCompileOpt [IN] compile option - * @param value [IN] pointer for the option value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h deleted file mode 100644 index bfb8a68b..00000000 --- a/inc/external/acl/acl_prof.h +++ /dev/null @@ -1,297 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_PROF_H_ -#define INC_EXTERNAL_ACL_PROF_H_ - -#include "acl_base.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ACL_PROF_ACL_API 0x0001 -#define ACL_PROF_TASK_TIME 0x0002 -#define ACL_PROF_AICORE_METRICS 0x0004 -#define ACL_PROF_AICPU_TRACE 0x0008 - -#define ACL_PROF_MAX_OP_NAME_LEN 257 -#define ACL_PROF_MAX_OP_TYPE_LEN 65 - -typedef enum { - ACL_AICORE_ARITHMATIC_THROUGHPUT = 0, - ACL_AICORE_PIPELINE = 1, - ACL_AICORE_SYNCHRONIZATION = 2, - ACL_AICORE_MEMORY = 3, - ACL_AICORE_INTERNAL_MEMORY = 4, - ACL_AICORE_STALL = 5, - ACL_AICORE_NONE = 0xFF -} aclprofAicoreMetrics; - -typedef struct aclprofConfig aclprofConfig; -typedef struct aclprofStopConfig aclprofStopConfig; -typedef struct aclprofAicoreEvents aclprofAicoreEvents; -typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; - -/** - * @ingroup AscendCL - * @brief profiling initialize - * - * @param profilerResultPath [IN] path of profiling result - * @param length [IN] length of profilerResultPath - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofFinalize - */ -ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); - -/** - * @ingroup AscendCL - * @brief profiling finalize - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofInit - */ -ACL_FUNC_VISIBILITY aclError aclprofFinalize(); - -/** - * @ingroup AscendCL - * @brief Start profiling modules by profilerConfig - * - * @param profilerConfig [IN] config of profiling - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofStop - */ -ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); - -/** - * @ingroup AscendCL - * @brief Create data of type aclprofConfig - * - * @param deviceIdList [IN] list of device id - * @param deviceNums [IN] number of devices - * @param aicoreMetrics [IN] type of aicore metrics - * @param aicoreEvents [IN] pointer to aicore events, only support NULL now - * @param dataTypeConfig [IN] config modules need profiling - * - * @retval the aclprofConfig pointer - * - * @see aclprofDestroyConfig - */ -ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, - aclprofAicoreMetrics aicoreMetrics, - aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); - -/** - * @ingroup AscendCL - * @brief Destroy data of type aclprofConfig - * - * @param profilerConfig [IN] config of profiling - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofCreateConfig - */ -ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); - -/** - * @ingroup AscendCL - * @brief stop profiling modules by stopProfilingConfig - * - * @param profilerConfig [IN] pointer to stop config of profiling - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofStart - */ -ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); - -/** - * @ingroup AscendCL - * @brief subscribe profiling data of model - * - * @param modelId [IN] the model id subscribed - * @param profSubscribeConfig [IN] pointer to config of model subscribe - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofModelUnSubscribe - */ -ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); - -/** - * @ingroup AscendCL - * @brief unsubscribe profiling data of model - * - * @param modelId [IN] the model id unsubscribed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofModelSubscribe - */ -ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); - -/** - * @ingroup AscendCL - * @brief create subscribe config - * - * @param timeInfoSwitch [IN] switch whether get time info from model - * @param aicoreMetrics [IN] aicore metrics - * @param fd [IN] pointer to write pipe - * - * @retval the aclprofSubscribeConfig pointer - * - * @see aclprofDestroySubscribeConfig - */ -ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, - aclprofAicoreMetrics aicoreMetrics, void *fd); - -/** - * @ingroup AscendCL - * @brief destroy subscribe config - * - * @param profSubscribeConfig [IN] subscribe config - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclprofCreateSubscribeConfig - */ -ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); - -/** - * @ingroup AscendCL - * @brief create subscribe config - * - * @param opDescSize [OUT] size of op desc - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); - -/** - * @ingroup AscendCL - * @brief get op number from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param opNumber [OUT] op number of subscription data - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); - -/** - * @ingroup AscendCL - * @brief get op type from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param index [IN] index of op array in opInfo - * @param opType [OUT] obtained op type string - * @param opTypeLen [IN] obtained length of op type string - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, - size_t opTypeLen); - -/** - * @ingroup AscendCL - * @brief get op type from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param index [IN] index of op array in opInfo - * @param opName [OUT] obtained op name string - * @param opNameLen [IN] obtained length of op name string - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, - size_t opNameLen); - -/** - * @ingroup AscendCL - * @brief get start time of specified op from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param index [IN] index of op array in opInfo - * - * @retval start time(us) of specified op with timestamp - * @retval 0 for failed - */ -ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); - -/** - * @ingroup AscendCL - * @brief get end time of specified op from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param index [IN] index of op array in opInfo - * - * @retval end time(us) of specified op with timestamp - * @retval 0 for failed - */ -ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); - -/** - * @ingroup AscendCL - * @brief get excution time of specified op from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * @param index [IN] index of op array in opInfo - * - * @retval execution time(us) of specified op with timestamp - * @retval 0 for failed - */ -ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); - -/** - * @ingroup AscendCL - * @brief get model id from subscription data - * - * @param opInfo [IN] pointer to subscription data - * @param opInfoLen [IN] memory size of subscription data - * - * @retval model id of subscription data - * @retval 0 for failed - */ -ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_PROF_H_ \ No newline at end of file diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h deleted file mode 100644 index eb6b4240..00000000 --- a/inc/external/acl/acl_rt.h +++ /dev/null @@ -1,932 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_ACL_RT_H_ -#define INC_EXTERNAL_ACL_ACL_RT_H_ - -#include -#include -#include "acl_base.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum aclrtRunMode { - ACL_DEVICE, - ACL_HOST, -} aclrtRunMode; - -typedef enum aclrtTsId { - ACL_TS_ID_AICORE = 0, - ACL_TS_ID_AIVECTOR = 1, - ACL_TS_ID_RESERVED = 2, -} aclrtTsId; - -typedef enum aclrtEventStatus { - ACL_EVENT_STATUS_COMPLETE = 0, - ACL_EVENT_STATUS_NOT_READY = 1, - ACL_EVENT_STATUS_RESERVED = 2, -} aclrtEventStatus; - -typedef enum aclrtCallbackBlockType { - ACL_CALLBACK_NO_BLOCK, - ACL_CALLBACK_BLOCK, -} aclrtCallbackBlockType; - -typedef enum aclrtMemcpyKind { - ACL_MEMCPY_HOST_TO_HOST, - ACL_MEMCPY_HOST_TO_DEVICE, - ACL_MEMCPY_DEVICE_TO_HOST, - ACL_MEMCPY_DEVICE_TO_DEVICE, -} aclrtMemcpyKind; - -typedef enum aclrtMemMallocPolicy { - ACL_MEM_MALLOC_HUGE_FIRST, - ACL_MEM_MALLOC_HUGE_ONLY, - ACL_MEM_MALLOC_NORMAL_ONLY, - ACL_MEM_MALLOC_HUGE_FIRST_P2P, - ACL_MEM_MALLOC_HUGE_ONLY_P2P, - ACL_MEM_MALLOC_NORMAL_ONLY_P2P, -} aclrtMemMallocPolicy; - -typedef enum aclrtMemAttr { - ACL_DDR_MEM, - ACL_HBM_MEM, - ACL_DDR_MEM_HUGE, - ACL_DDR_MEM_NORMAL, - ACL_HBM_MEM_HUGE, - ACL_HBM_MEM_NORMAL, - ACL_DDR_MEM_P2P_HUGE, - ACL_DDR_MEM_P2P_NORMAL, - ACL_HBM_MEM_P2P_HUGE, - ACL_HBM_MEM_P2P_NORMAL, -} aclrtMemAttr; - -typedef enum aclrtGroupAttr { - ACL_GROUP_AICORE_INT, - ACL_GROUP_AIV_INT, - ACL_GROUP_AIC_INT, - ACL_GROUP_SDMANUM_INT, - ACL_GROUP_ASQNUM_INT -} aclrtGroupAttr; - -typedef struct tagRtGroupInfo aclrtGroupInfo; - -typedef struct rtExceptionInfo aclrtExceptionInfo; - -typedef void (*aclrtCallback)(void *userData); - -typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); - -/** - * @ingroup AscendCL - * @brief Set a callback function to handle exception information - * - * @param callback [IN] callback function to handle exception information - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); - -/** - * @ingroup AscendCL - * @brief Get task id from exception information - * - * @param info [IN] pointer of exception information - * - * @retval The task id from exception information - * @retval 0xFFFFFFFF if info is null - */ -ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); - -/** - * @ingroup AscendCL - * @brief Get stream id from exception information - * - * @param info [IN] pointer of exception information - * - * @retval The stream id from exception information - * @retval 0xFFFFFFFF if info is null - */ -ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); - -/** - * @ingroup AscendCL - * @brief Get thread id from exception information - * - * @param info [IN] pointer of exception information - * - * @retval The thread id of fail task - * @retval 0xFFFFFFFF if info is null - */ -ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); - -/** - * @ingroup AscendCL - * @brief Get device id from exception information - * - * @param info [IN] pointer of exception information - * - * @retval The thread id of fail task - * @retval 0xFFFFFFFF if info is null - */ -ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); - -/** - * @ingroup AscendCL - * @brief The thread that handles the callback function on the Stream - * - * @param threadId [IN] thread ID - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Add a callback function to be executed on the host - * to the task queue of the Stream - * - * @param fn [IN] Specify the callback function to be added - * The function prototype of the callback function is: - * typedef void (*aclrtCallback)(void *userData); - * @param userData [IN] User data to be passed to the callback function - * @param blockType [IN] callback block type - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief After waiting for a specified time, trigger callback processing - * - * @par Function - * The thread processing callback specified by - * the aclrtSubscribeReport interface - * - * @param timeout [IN] timeout value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSubscribeReport - */ -ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); - -/** - * @ingroup AscendCL - * @brief Cancel thread registration, - * the callback function on the specified Stream - * is no longer processed by the specified thread - * - * @param threadId [IN] thread ID - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create context and associates it with the calling thread - * - * @par Function - * The following use cases are supported: - * @li If you don't call the aclrtCreateContext interface - * to explicitly create the context, - * the system will use the default context, which is implicitly created - * when the aclrtSetDevice interface is called. - * @li If multiple contexts are created in a process - * (there is no limit on the number of contexts), - * the current thread can only use one of them at the same time. - * It is recommended to explicitly specify the context of the current thread - * through the aclrtSetCurrentContext interface to increase. - * the maintainability of the program. - * - * @param context [OUT] point to the created context - * @param deviceId [IN] device to create context on - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSetDevice | aclrtSetCurrentContext - */ -ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); - -/** - * @ingroup AscendCL - * @brief destroy context instance - * - * @par Function - * Can only destroy context created through aclrtCreateContext interface - * - * @param context [IN] the context to destroy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateContext - */ -ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); - -/** - * @ingroup AscendCL - * @brief set the context of the thread - * - * @par Function - * The following scenarios are supported: - * @li If the aclrtCreateContext interface is called in a thread to explicitly - * create a Context (for example: ctx1), the thread's Context can be specified - * without calling the aclrtSetCurrentContext interface. - * The system uses ctx1 as the context of thread1 by default. - * @li If the aclrtCreateContext interface is not explicitly created, - * the system uses the default context as the context of the thread. - * At this time, the aclrtDestroyContext interface cannot be used to release - * the default context. - * @li If the aclrtSetCurrentContext interface is called multiple times to - * set the thread's Context, the last one prevails. - * - * @par Restriction - * @li If the cevice corresponding to the context set for the thread - * has been reset, you cannot set the context as the context of the thread, - * otherwise a business exception will result. - * @li It is recommended to use the context created in a thread. - * If the aclrtCreateContext interface is called in thread A to create a context, - * and the context is used in thread B, - * the user must guarantee the execution order of tasks in the same stream - * under the same context in two threads. - * - * @param context [IN] the current context of the thread - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateContext | aclrtDestroyContext - */ -ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); - -/** - * @ingroup AscendCL - * @brief get the context of the thread - * - * @par Function - * If the user calls the aclrtSetCurrentContext interface - * multiple times to set the context of the current thread, - * then the last set context is obtained - * - * @param context [OUT] the current context of the thread - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSetCurrentContext - */ -ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); - -/** - * @ingroup AscendCL - * @brief Specify the device to use for the operation - * implicitly create the default context and the default stream - * - * @par Function - * The following use cases are supported: - * @li Device can be specified in the process or thread. - * If you call the aclrtSetDevice interface multiple - * times to specify the same device, - * you only need to call the aclrtResetDevice interface to reset the device. - * @li The same device can be specified for operation - * in different processes or threads. - * @li Device is specified in a process, - * and multiple threads in the process can share this device to explicitly - * create a Context (aclrtCreateContext interface). - * @li In multi-device scenarios, you can switch to other devices - * through the aclrtSetDevice interface in the process. - * - * @param deviceId [IN] the device id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtResetDevice |aclrtCreateContext - */ -ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); - -/** - * @ingroup AscendCL - * @brief Reset the current operating Device and free resources on the device, - * including the default context, the default stream, - * and all streams created under the default context, - * and synchronizes the interface. - * If the task under the default context or stream has not been completed, - * the system will wait for the task to complete before releasing it. - * - * @par Restriction - * @li The Context, Stream, and Event that are explicitly created - * on the device to be reset. Before resetting, - * it is recommended to follow the following interface calling sequence, - * otherwise business abnormalities may be caused. - * @li Interface calling sequence: - * call aclrtDestroyEvent interface to release Event or - * call aclrtDestroyStream interface to release explicitly created Stream-> - * call aclrtDestroyContext to release explicitly created Context-> - * call aclrtResetDevice interface - * - * @param deviceId [IN] the device id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); - -/** - * @ingroup AscendCL - * @brief get target device of current thread - * - * @param deviceId [OUT] the device id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); - -/** - * @ingroup AscendCL - * @brief get target side - * - * @param runMode [OUT] the run mode - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); - -/** - * @ingroup AscendCL - * @brief Wait for compute device to finish - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); - -/** - * @ingroup AscendCL - * @brief Set Scheduling TS - * - * @param tsId [IN] the ts id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); - -/** - * @ingroup AscendCL - * @brief get total device number. - * - * @param count [OUT] the device number - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); - -/** - * @ingroup AscendCL - * @brief create event instance - * - * @param event [OUT] created event - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); - -/** - * @ingroup AscendCL - * @brief destroy event instance - * - * @par Function - * Only events created through the aclrtCreateEvent interface can be - * destroyed, synchronous interfaces. When destroying an event, - * the user must ensure that the tasks involved in the aclrtSynchronizeEvent - * interface or the aclrtStreamWaitEvent interface are completed before - * they are destroyed. - * - * @param event [IN] event to destroy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent - */ -ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); - -/** - * @ingroup AscendCL - * @brief Record an Event in the Stream - * - * @param event [IN] event to record - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Reset an event - * - * @par Function - * Users need to make sure to wait for the tasks in the Stream - * to complete before resetting the Event - * - * @param event [IN] event to reset - * @param stream [IN] stream handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Queries an event's status - * - * @param event [IN] event to query - * @param status [OUT] event status - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); - -/** - * @ingroup AscendCL - * @brief Block Host Running, wait event to be complete - * - * @param event [IN] event to wait - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); - -/** - * @ingroup AscendCL - * @brief computes the elapsed time between events. - * - * @param ms [OUT] time between start and end in ms - * @param start [IN] starting event - * @param end [IN] ending event - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); - -/** - * @ingroup AscendCL - * @brief alloc memory on device - * - * @par Function - * alloc for size linear memory on device - * and return a pointer to allocated memory by *devPtr - * - * @par Restriction - * @li The memory requested by the aclrtMalloc interface needs to be released - * through the aclrtFree interface. - * @li Before calling the media data processing interface, - * if you need to apply memory on the device to store input or output data, - * you need to call acldvppMalloc to apply for memory. - * - * @param devPtr [OUT] pointer to pointer to allocated memory on device - * @param size [IN] alloc memory size - * @param policy [IN] memory alloc policy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtFree | acldvppMalloc | aclrtMallocCached - */ -ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); - -/** - * @ingroup AscendCL - * @brief allocate memory on device with cache - * - * @par Function - * alloc for size linear memory on device - * and return a pointer to allocated memory by *devPtr - * - * @par Restriction - * @li The memory requested by the aclrtMallocCached interface needs to be released - * through the aclrtFree interface. - * - * @param devPtr [OUT] pointer to pointer to allocated memory on device - * @param size [IN] alloc memory size - * @param policy [IN] memory alloc policy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtFree | aclrtMalloc - */ -ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); - -/** - * @ingroup AscendCL - * @brief flush cache data to ddr - * - * @param devPtr [IN] the pointer that flush data to ddr - * @param size [IN] flush size - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); - -/** - * @ingroup AscendCL - * @brief invalidate cache data - * - * @param devPtr [IN] pointer to invalidate cache data - * @param size [IN] invalidate size - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); - -/** - * @ingroup AscendCL - * @brief free device memory - * - * @par Function - * can only free memory allocated through the aclrtMalloc interface - * - * @param devPtr [IN] Pointer to memory to be freed - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtMalloc - */ -ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); - -/** - * @ingroup AscendCL - * @brief alloc memory on host - * - * @par Restriction - * @li The requested memory cannot be used in the Device - * and needs to be explicitly copied to the Device. - * @li The memory requested by the aclrtMallocHost interface - * needs to be released through the aclrtFreeHost interface. - * - * @param hostPtr [OUT] pointer to pointer to allocated memory on the host - * @param size [IN] alloc memory size - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtFreeHost - */ -ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); - -/** - * @ingroup AscendCL - * @brief free host memory - * - * @par Function - * can only free memory allocated through the aclrtMallocHost interface - * - * @param hostPtr [IN] free memory pointer - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtMallocHost - */ -ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); - -/** - * @ingroup AscendCL - * @brief synchronous memory replication between host and device - * - * @param dst [IN] destination address pointer - * @param destMax [IN] Max length of the destination address memory - * @param src [IN] source address pointer - * @param count [IN] the length of byte to copy - * @param kind [IN] memcpy type - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, - aclrtMemcpyKind kind); - -/** - * @ingroup AscendCL - * @brief Initialize memory and set contents of memory to specified value - * - * @par Function - * The memory to be initialized is on the Host or device side, - * and the system determines whether - * it is host or device according to the address - * - * @param devPtr [IN] Starting address of memory - * @param maxCount [IN] Max length of destination address memory - * @param value [IN] Set value - * @param count [IN] The length of memory - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); - -/** - * @ingroup AscendCL - * @brief Asynchronous memory replication between Host and Device - * - * @par Function - * After calling this interface, - * be sure to call the aclrtSynchronizeStream interface to ensure that - * the task of memory replication has been completed - * - * @par Restriction - * @li For on-chip Device-to-Device memory copy, - * both the source and destination addresses must be 64-byte aligned - * - * @param dst [IN] destination address pointer - * @param destMax [IN] Max length of destination address memory - * @param src [IN] source address pointer - * @param count [IN] the number of byte to copy - * @param kind [IN] memcpy type - * @param stream [IN] asynchronized task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, - aclrtMemcpyKind kind, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Asynchronous initialize memory - * and set contents of memory to specified value async - * - * @par Function - * The memory to be initialized is on the Host or device side, - * and the system determines whether - * it is host or device according to the address - * - * @param devPtr [IN] destination address pointer - * @param maxCount [IN] Max length of destination address memory - * @param value [IN] set value - * @param count [IN] the number of byte to set - * @param stream [IN] asynchronized task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create stream instance - * - * @param stream [OUT] the created stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); - -/** - * @ingroup AscendCL - * @brief destroy stream instance - * - * @par Function - * Can only destroy streams created through the aclrtCreateStream interface - * - * @par Restriction - * Before calling the aclrtDestroyStream interface to destroy - * the specified Stream, you need to call the aclrtSynchronizeStream interface - * to ensure that the tasks in the Stream have been completed. - * - * @param stream [IN] the stream to destroy - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateStream | aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief block the host until all tasks - * in the specified stream have completed - * - * @param stream [IN] the stream to wait - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Blocks the operation of the specified Stream until - * the specified Event is completed. - * Support for multiple streams waiting for the same event. - * - * @param stream [IN] the wait stream If using thedefault Stream, set NULL - * @param event [IN] the event to wait - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); - -/** - * @ingroup AscendCL - * @brief set group - * - * @par Function - * set the task to the corresponding group - * - * @param groupId [IN] group id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail - */ -ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); - -/** - * @ingroup AscendCL - * @brief get the number of group - * - * @par Function - * get the number of group. if the number of group is zero, - * it means that group is not supported or group is not created. - * - * @param count [OUT] the number of group - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - */ -ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); - -/** - * @ingroup AscendCL - * @brief create group information - * - * @retval null for failed. - * @retval OtherValues success. - * - * @see aclrtDestroyGroupInfo - */ -ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); - -/** - * @ingroup AscendCL - * @brief destroy group information - * - * @param groupInfo [IN] pointer to group information - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtCreateGroupInfo - */ -ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); - -/** - * @ingroup AscendCL - * @brief get all group information - * - * @param groupInfo [OUT] pointer to group information - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtGetGroupCount - */ -ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); - -/** - * @ingroup AscendCL - * @brief get detail information of group - * - * @param groupInfo [IN] pointer to group information - * @param groupId [IN] group index value - * @param attr [IN] group attribute - * @param attrValue [OUT] pointer to attribute value - * @param valueLen [IN] length of attribute value - * @param paramRetSize [OUT] pointer to real length of attribute value - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtGetGroupCount | aclrtGetAllGroupInfo - */ -ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, - aclrtGroupAttr attr, void *attrValue, size_t valueLen, - size_t *paramRetSize); - -/** - * @ingroup AscendCL - * @brief checking whether current device and peer device support the p2p feature - * - * @param canAccessPeer [OUT] pointer to save the checking result - * @param deviceId [IN] current device id - * @param peerDeviceId [IN] peer device id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess - */ -ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); - -/** - * @ingroup AscendCL - * @brief enable the peer device to support the p2p feature - * - * @param peerDeviceId [IN] the peer device id - * @param flags [IN] reserved field, now it must be zero - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess - */ -ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); - -/** - * @ingroup AscendCL - * @brief disable the peer device to support the p2p function - * - * @param peerDeviceId [IN] the peer device id - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess - */ -ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); - -/** - * @ingroup AscendCL - * @brief Obtain the free memory and total memory of specified attribute. - * the specified memory include normal memory and huge memory. - * - * @param attr [IN] the memory attribute of specified device - * @param free [OUT] the free memory of specified device - * @param total [OUT] the total memory of specified device. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_RT_H_ diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h deleted file mode 100644 index c357518d..00000000 --- a/inc/external/acl/acl_tdt.h +++ /dev/null @@ -1,276 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ -#define INC_EXTERNAL_ACL_ACL_TDT_H_ - -#include "acl/acl_base.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum acltdtTensorType { - ACL_TENSOR_DATA_UNDEFINED = -1, - ACL_TENSOR_DATA_TENSOR, - ACL_TENSOR_DATA_END_OF_SEQUENCE, - ACL_TENSOR_DATA_ABNORMAL -}; - -typedef struct acltdtDataItem acltdtDataItem; -typedef struct acltdtDataset acltdtDataset; -typedef struct acltdtChannelHandle acltdtChannelHandle; - -/** - * @ingroup AscendCL - * @brief Get tensor type from item - * - * @param dataItem [IN] pointer to the data item - * - * @retval Tensor type. - * @retval ACL_DT_UNDEFINED if dataItem is null - */ -ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get data type from item - * - * @param dataItem [IN] pointer to the data item - * - * @retval Data type. - * @retval ACL_DT_UNDEFINED if dataItem is null - */ -ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get data address from item - * - * @param dataItem [IN] pointer to data item - * - * @retval null for failed - * @retval OtherValues success - */ -ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get data size from item - * - * @param dataItem [IN] pointer to data item - * - * @retval 0 for failed - * @retval OtherValues success - */ -ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get dim's number from item - * - * @param dataItem [IN] pointer to data item - * - * @retval 0 for failed - * @retval OtherValues success - */ -ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get dims from item - * - * @param dataItem [IN] the struct of data item - * @param dims [IN|OUT] pointer to the dims of dataTtem - * @param dimNum [IN] the size of the dims - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); - -/** - * @ingroup AscendCL - * @brief Create the struct of data item - * - * @param tdtType [IN] Tdt tensor type - * @param dims [IN] pointer of tdtDataItem's dims - * @param dimNum [IN] Dim number - * @param dataType [IN] Data type - * @param data [IN] Data pointer - * @param size [IN] Data size - * - * @retval null for failed - * @retval OtherValues success - * - * @see acltdtDestroyDataItem - */ -ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, - aclDataType dataType, void *data, size_t size); - -/** - * @ingroup AscendCL - * @brief Destroy the struct of data item - * - * @param dataItem [IN] pointer to the data item - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtCreateDataItem - */ -ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Create the tdt dataset - * - * @retval null for failed - * @retval OtherValues success - * - * @see acltdtDestroyDataset - */ -ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); - -/** - * @ingroup AscendCL - * @brief Destroy the tdt dataset - * - * @param dataset [IN] pointer to the dataset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtCreateDataset - */ -ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); - -/** - * @ingroup AscendCL - * @brief Get the data item - * - * @param dataset [IN] pointer to the dataset - * @param index [IN] index of the dataset - * - * @retval null for failed - * @retval OtherValues success - * - * @see acltdtAddDataItem - */ -ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); - -/** - * @ingroup AscendCL - * @brief Get the data item - * - * @param dataset [OUT] pointer to the dataset - * @param dataItem [IN] pointer to the data item - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtGetDataItem - */ -ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); - -/** - * @ingroup AscendCL - * @brief Get the size of dataset - * - * @param dataset [IN] pointer to the dataset - * - * @retval 0 for failed - * @retval OtherValues success - */ -ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); - -/** - * @ingroup AscendCL - * @brief Stop the channel - * - * @param handle [IN] pointer to the channel handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtCreateChannel | acltdtDestroyChannel - */ -ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); - -/** - * @ingroup AscendCL - * @brief Create the channel - * - * @param deviceId [IN] the device id - * @param name [IN] the channel's name - * - * @retval null for failed - * @retval OtherValues success - * - * @see acltdtStopChannel | acltdtDestroyChannel - */ -ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); - -/** - * @ingroup AscendCL - * @brief Destroy the channel - * - * @param handle [IN] pointer to the channel handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtCreateChannel | acltdtStopChannel - */ -ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); - -/** - * @ingroup AscendCL - * @brief Send tensor to device - * - * @param handle [IN] pointer to the channel handle - * @param dataset [IN] pointer to the dataset - * @param timeout [IN] to be reserved, now it must be -1 - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtReceiveTensor - */ -ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, - int32_t timeout); - -/** - * @ingroup AscendCL - * @brief Receive tensor from device - * - * @param handle [IN] pointer to the channel handle - * @param dataset [OUT] pointer to the dataset - * @param timeout [IN] to be reserved, now it must be -1 - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acltdtSendTensor - */ -ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, - int32_t timeout); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h deleted file mode 100644 index 6f82a897..00000000 --- a/inc/external/acl/error_codes/ge_error_codes.h +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ -#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif -static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; -static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID = 145004; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION = 145005; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PARTITION_NUM_INVALID = 145010; -static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; -static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; -static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; -static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; -static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; -static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; -static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; -static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; -static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; -static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; -static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; -static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; -static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; -static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; -static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; -#ifdef __cplusplus -} // namespace ge -#endif -#endif // INC_EXTERNAL_GE_GE_ERROR_CODES_H_ diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h deleted file mode 100644 index 0ae5303d..00000000 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error - -static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error - -static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error - -#ifdef __cplusplus -} -#endif - -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h deleted file mode 100644 index 571a1183..00000000 --- a/inc/external/acl/ops/acl_cblas.h +++ /dev/null @@ -1,333 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ -#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ - -#include "acl/acl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; - -typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; - -/** - * @ingroup AscendCL - * @brief perform the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param alpha [IN] pointer to scalar used for multiplication. - * of same type as dataTypeC - * @param a [IN] pointer to matrix A - * @param lda [IN] leading dimension used to store the matrix A - * @param dataTypeA [IN] datatype of matrix A - * @param x [IN] pointer to vector x - * @param incx [IN] stride between consecutive elements of vector x - * @param dataTypeX [IN] datatype of vector x - * @param beta [IN] pointer to scalar used for multiplication. - * of same type as dataTypeC If beta == 0, - * then y does not have to be a valid input - * @param y [IN|OUT] pointer to vector y - * @param incy [IN] stride between consecutive elements of vector y - * @param dataTypeY [IN] datatype of vector y - * @param type [IN] computation type - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, - aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, - const void *beta, void *y, int incy, aclDataType dataTypeY, - aclComputeType type, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param dataTypeA [IN] datatype of matrix A - * @param dataTypeX [IN] datatype of vector x - * @param dataTypeY [IN] datatype of vector y - * @param type [IN] computation type - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, - aclDataType dataTypeX, aclDataType dataTypeY, - aclComputeType type, aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief perform the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param alpha [IN] pointer to scalar used for multiplication - * @param a [IN] pointer to matrix A - * @param lda [IN] leading dimension used to store the matrix A - * @param x [IN] pointer to vector x - * @param incx [IN] stride between consecutive elements of vector x - * @param beta [IN] pointer to scalar used for multiplication. - * If beta value == 0, - * then y does not have to be a valid input - * @param y [IN|OUT] pointer to vector y - * @param incy [IN] stride between consecutive elements of vector y - * @param type [IN] computation type - * @param stream [IN] stream - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, - const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, - const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param type [IN] computation type - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, - aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief perform the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param alpha [IN] pointer to scalar used for multiplication - * @param a [IN] pointer to matrix A - * @param lda [IN] leading dimension used to store the matrix A - * @param x [IN] pointer to vector x - * @param incx [IN] stride between consecutive elements of vector x - * @param beta [IN] pointer to scalar used for multiplication. - * If beta value == 0, - * then y does not have to be a valid input - * @param y [IN|OUT] pointer to vector y - * @param incy [IN] stride between consecutive elements of vector y - * @param type [IN] computation type - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, - int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, - int incy, aclComputeType type, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-vector multiplication - * - * @param transA [IN] transpose type of matrix A - * @param m [IN] number of rows of matrix A - * @param n [IN] number of columns of matrix A - * @param handle [OUT] pointer to the pointer to the handle - * @param type [IN] computation type - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, - aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief perform the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC - * @param matrixA [IN] pointer to matrix A - * @param lda [IN] leading dimension array used to store matrix A - * @param dataTypeA [IN] datatype of matrix A - * @param matrixB [IN] pointer to matrix B - * @param ldb [IN] leading dimension array used to store matrix B - * @param dataTypeB [IN] datatype of matrix B - * @param beta [IN] pointer to scalar used for multiplication. - * of same type as dataTypeC If beta == 0, - * then matrixC does not have to be a valid input - * @param matrixC [IN|OUT] pointer to matrix C - * @param ldc [IN] leading dimension array used to store matrix C - * @param dataTypeC [IN] datatype of matrix C - * @param type [IN] computation type - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const void *alpha, const void *matrixA, int lda, - aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, - const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, - aclComputeType type, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param dataTypeA [IN] datatype of matrix A - * @param dataTypeB [IN] datatype of matrix B - * @param dataTypeC [IN] datatype of matrix C - * @param type [IN] computation type - * @param handle [OUT] pointer to the pointer to the handle - * @param type [IN] computation type - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclDataType dataTypeA, - aclDataType dataTypeB, aclDataType dataTypeC, - aclComputeType type, aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief perform the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param alpha [IN] pointer to scalar used for multiplication - * @param matrixA [IN] pointer to matrix A - * @param lda [IN] leading dimension used to store the matrix A - * @param matrixB [IN] pointer to matrix B - * @param ldb [IN] leading dimension used to store the matrix B - * @param beta [IN] pointer to scalar used for multiplication. - * If beta value == 0, - * then matrixC does not have to be a valid input - * @param matrixC [IN|OUT] pointer to matrix C - * @param ldc [IN] leading dimension used to store the matrix C - * @param type [IN] computation type - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, - const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, - aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param type [IN] computation type - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, - aclopHandle **handle); - -/** - * @ingroup AscendCL - * @brief perform the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param alpha [IN] pointer to scalar used for multiplication - * @param matrixA [IN] pointer to matrix A - * @param lda [IN] leading dimension used to store the matrix A - * @param matrixB [IN] pointer to matrix B - * @param ldb [IN] leading dimension used to store the matrix B - * @param beta [IN] pointer to scalar used for multiplication. - * If beta value == 0, - * then matrixC does not have to be a valid input - * @param matrixC [IN|OUT] pointer to matrix C - * @param ldc [IN] leading dimension used to store the matrix C - * @param type [IN] computation type - * @param stream [IN] stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const int32_t *alpha, const int8_t *matrixA, int lda, - const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, - int ldc, aclComputeType type, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief create a handle for performing the matrix-matrix multiplication - * - * @param transA [IN] transpose type of matrix A - * @param transB [IN] transpose type of matrix B - * @param transC [IN] transpose type of matrix C - * @param m [IN] number of rows of matrix A and matrix C - * @param n [IN] number of columns of matrix B and matrix C - * @param k [IN] number of columns of matrix A and rows of matrix B - * @param type [IN] computation type - * @param handle [OUT] pointer to the pointer to the handle - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, - aclopHandle **handle); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h deleted file mode 100644 index 32a21e91..00000000 --- a/inc/external/acl/ops/acl_dvpp.h +++ /dev/null @@ -1,2340 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if !defined(ENABLE_DVPP_INTERFACE) -#if defined(_MSC_VER) -#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).") -#else -#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)." -#endif -#endif - -#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ -#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ - -#include -#include -#include "acl/acl.h" -#include "acl/acl_base.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct acldvppPicDesc acldvppPicDesc; -typedef struct acldvppBatchPicDesc acldvppBatchPicDesc; -typedef struct acldvppRoiConfig acldvppRoiConfig; -typedef struct acldvppResizeConfig acldvppResizeConfig; -typedef struct acldvppBorderConfig acldvppBorderConfig; -typedef struct acldvppLutMap acldvppLutMap; -typedef struct acldvppChannelDesc acldvppChannelDesc; -typedef struct acldvppJpegeConfig acldvppJpegeConfig; -typedef struct aclvdecChannelDesc aclvdecChannelDesc; -typedef struct acldvppStreamDesc acldvppStreamDesc; -typedef struct aclvdecFrameConfig aclvdecFrameConfig; -typedef struct aclvencChannelDesc aclvencChannelDesc; -typedef struct aclvencFrameConfig aclvencFrameConfig; -typedef struct acldvppHist acldvppHist; -typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData); -typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata); - -// Supported Pixel Format -enum acldvppPixelFormat { - PIXEL_FORMAT_YUV_400 = 0, // 0 - PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 - PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 - PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 - PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 - PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 - PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 - PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 - PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 - PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 - PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 - PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 - PIXEL_FORMAT_RGB_888 = 12, // 12 - PIXEL_FORMAT_BGR_888 = 13, // 13 - PIXEL_FORMAT_ARGB_8888 = 14, // 14 - PIXEL_FORMAT_ABGR_8888 = 15, // 15 - PIXEL_FORMAT_RGBA_8888 = 16, // 16 - PIXEL_FORMAT_BGRA_8888 = 17, // 17 - PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 - PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 - PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 - PIXEL_FORMAT_YVU_PLANAR_422, - PIXEL_FORMAT_YVU_PLANAR_444, - PIXEL_FORMAT_RGB_444 = 23, - PIXEL_FORMAT_BGR_444, - PIXEL_FORMAT_ARGB_4444, - PIXEL_FORMAT_ABGR_4444, - PIXEL_FORMAT_RGBA_4444, - PIXEL_FORMAT_BGRA_4444, - PIXEL_FORMAT_RGB_555, - PIXEL_FORMAT_BGR_555, - PIXEL_FORMAT_RGB_565, - PIXEL_FORMAT_BGR_565, - PIXEL_FORMAT_ARGB_1555, - PIXEL_FORMAT_ABGR_1555, - PIXEL_FORMAT_RGBA_1555, - PIXEL_FORMAT_BGRA_1555, - PIXEL_FORMAT_ARGB_8565, - PIXEL_FORMAT_ABGR_8565, - PIXEL_FORMAT_RGBA_8565, - PIXEL_FORMAT_BGRA_8565, - PIXEL_FORMAT_RGB_BAYER_8BPP = 50, - PIXEL_FORMAT_RGB_BAYER_10BPP, - PIXEL_FORMAT_RGB_BAYER_12BPP, - PIXEL_FORMAT_RGB_BAYER_14BPP, - PIXEL_FORMAT_RGB_BAYER_16BPP, - PIXEL_FORMAT_BGR_888_PLANAR = 70, - PIXEL_FORMAT_HSV_888_PACKAGE, - PIXEL_FORMAT_HSV_888_PLANAR, - PIXEL_FORMAT_LAB_888_PACKAGE, - PIXEL_FORMAT_LAB_888_PLANAR, - PIXEL_FORMAT_S8C1, - PIXEL_FORMAT_S8C2_PACKAGE, - PIXEL_FORMAT_S8C2_PLANAR, - PIXEL_FORMAT_S16C1, - PIXEL_FORMAT_U8C1, - PIXEL_FORMAT_U16C1, - PIXEL_FORMAT_S32C1, - PIXEL_FORMAT_U32C1, - PIXEL_FORMAT_U64C1, - PIXEL_FORMAT_S64C1, - PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, - PIXEL_FORMAT_YVU_SEMIPLANAR_440, - PIXEL_FORMAT_FLOAT32, - PIXEL_FORMAT_BUTT, - PIXEL_FORMAT_UNKNOWN = 10000 -}; - -// Stream Format -enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; - -// Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; - -// Supported Border Type -enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; - -/** - * @ingroup AscendCL - * @brief alloc device memory for dvpp. - * - * @par Function - * @li It's mainly used for allocating memory to device media data processing. - * The requested memory meets the data processing requirements. - * After calling this interface to request memory, - * you must release the memory using the acldvppFree interface. - * @li When calling the acldvppMalloc interface to apply for memory, - * the size entered by the user is aligned upwards to 32 integer multiples, - * and an additional 32 bytes are applied. - * - * @par Restriction - * If the user uses the acldvppMalloc interface to apply for a large block of - * memory and divide and manage the memory by himself, - * when applying for memory, the user needs to align up to 32 integer - * times + 32 bytes (ALIGN_UP [len] +32 words) according to - * the actual data size of each picture Section) to manage memory. - * - * @param devPtr [OUT] memory pointer. - * @param size [IN] memory size. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppFree - */ -ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size); - -/** - * @ingroup AscendCL - * @brief free device memory for dvpp. - * - * @par Function - * Free the memory requested through the acldvppMalloc interface - * @param devPtr [IN] memory pointer to free. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppMalloc - */ -ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr); - -/** - * @ingroup AscendCL - * @brief create DvppChannelDesc. - * - * @par Function - * Create a channel for image data processing. - * The same channel can be reused - * and is no longer available after destruction - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc(); - -/** - * @ingroup AscendCL - * @brief destroy dvppChannelDesc. - * - * @par Function - * Can only destroy channels created by the acldvppCreateChannel interface - * @param channelDesc [IN] the channel description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannelDesc | acldvppDestroyChannel - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp channel Id. - * - * @par Restriction - * Interface calling sequence: - * acldvppCreateChannelDesc --> acldvppCreateChannel --> - * acldvppGetChannelDescChannelId - * - * @param channelDesc [IN] the channel description. - * - * @retval channel id. - * - * @see acldvppCreateChannelDesc | acldvppCreateChannel - */ -ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Create dvpp picture description. - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc(); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp picture description. - * - * @par Function - * Can only destroy picture description information created - * through acldvppCreatePicDesc interface. - * @param picDesc [IN] dvpp picture description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's data. - * - * @param picDesc [OUT] dvpp picture description. - * @param dataDev [IN] dvpp picture dataDev.Must be the memory - * requested using the acldvppMalloc interface. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppMalloc - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's size. - * - * @param picDesc [OUT] dvpp picture description. - * @param size dvpp [IN] picture size. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's format. - * - * @param picDesc [OUT] dvpp picture description. - * @param format [IN] dvpp picture format. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's width. - * - * @param picDesc [OUT] dvpp picture description. - * @param width [IN] dvpp picture width. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's height. - * - * @param picDesc [OUT] dvpp picture description. - * @param height [IN] dvpp picture height. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's widthStride. - * - * @par Restriction - * Width alignment requirements: - * @li The minimum stride is 32 and the maximum is 4096 * 4 - * (that is, an image in argb format with a width of 4096); - * @li For 8K scaling, widthStride is required to be aligned to 2; - * @li For non 8K scaling, the calculation formula for widthStride - * is different for different image formats: - * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 - * @li yuv422packed: input image width * 2 and then align to 16 - * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 - * @li xrgb8888: input image width * 4, align to 16 - * @li HFBC:input image width - * - * @param picDesc [OUT] dvpp picture description. - * @param widthStride [IN] dvpp picture widthStride. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's heightStride. - * - * @par Restriction - * Height alignment requirements: - * @li The height of the input image is aligned to 2. - * High stride minimum 6 and maximum 4096. - * - * @param picDesc [OUT] dvpp picture description. - * @param heightStride [IN] dvpp picture heightStride. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride); - -/** - * @ingroup AscendCL - * @brief Set dvpp picture description's retcode. - * - * @param picDesc [OUT] dvpp picture description. - * @param retCode [IN] dvpp picture retcode. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode); - -/** - * @ingroup AscendCL - * @brief Get picture data. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval picture data addr. - * @retval default nullptr. - */ -ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get picture data size. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval picture data size. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's format. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval format - * @retval default PIXEL_FORMAT_YUV_400. - */ -ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's width. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval width. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's height. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval height. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's widthStride. - * - * @par Restriction - * Width alignment requirements: - * @li The minimum stride is 32 and the maximum is 4096 * 4 - * (that is, an image in argb format with a width of 4096); - * @li For 8K scaling, widthStride is required to be aligned to 2; - * @li For non 8K scaling, the calculation formula for widthStride - * is different for different image formats: - * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 - * @li yuv422packed: input image width * 2 and then align to 16 - * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 - * @li xrgb8888: input image width * 4, align to 16 - * @li HFBC:input image width - * - * @param picDesc [IN] dvpp picture description. - * - * @retval stride width. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's heightStride. - * - * @par Restriction - * Height alignment requirements: - * @li The height of the input image is aligned to 2. - * High stride minimum 6 and maximum 4096. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval stride height. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture desc's retcode. - * - * @param picDesc [IN] dvpp picture description. - * - * @retval ret code. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc); - -/** - * @ingroup AscendCL - * @brief Create dvpp roi config. - * - * @param left [IN] the left offset, must be even - * @param right [IN] the right offset, must be odd - * @param top [IN] the top offset, must be even - * @param bottom [IN] the bottom offset, must be odd - * - * @retval null for failed. - * @retval other success - */ -ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, - uint32_t bottom); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp roi config. - * - * @par Function - * Destroys data created through the acldvppCreateRoiConfig interface - * @param roiConfig [IN] dvpp roi config. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateRoiConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig); - -/** - * @ingroup AscendCL - * @brief Set left of RoiConfig. - * - * @param config [OUT] RoiConfig - * @param left [IN] left offset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left); - -/** - * @ingroup AscendCL - * @brief Set right of RoiConfig. - * - * @param config [OUT] RoiConfig - * @param right [IN] right offset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right); - -/** - * @ingroup AscendCL - * @brief Set top of RoiConfig. - * - * @param config [OUT] RoiConfig - * @param top [IN] top offset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top); - -/** - * @ingroup AscendCL - * @brief Set bottom of RoiConfig. - * - * @param config [OUT] RoiConfig - * @param bottom [IN] bottom offset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom); - -/** - * @ingroup AscendCL - * @brief Set RoiConfig. - * - * @param config [OUT] RoiConfig - * @param left [IN] left offset - * @param right [IN] right offset - * @param top [IN] top offset - * @param bottom [IN] bottom offset - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, - uint32_t bottom); - -/** - * @ingroup AscendCL - * @brief Create dvpp resize config. - * The specified scaling algorithm is not supported. - * The default scaling algorithm is "nearest neighbor interpolation". - * - * @retval null for failed. - * @retval other success. - */ -ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig(); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp resize config. - * - * @par Function - * Destroys the scaling configuration data created by - * the acldvppCreateResizeConfig interface - * - * @param resizeConfig [IN] resize config. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateResizeConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig); - -/** - * @ingroup AscendCL - * @brief Create jpege config. - * - * @retval null for failed. - * @retval other success. - */ -ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig(); - -/** - * @ingroup AscendCL - * @brief Destroy jpege config. - * - * @par Function - * Destroys the encoding configuration data created by - * the acldvppCreateJpegeConfig interface - * @param jpegeConfig [IN] config pointer to destroy. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateJpegeConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig); - -/** - * @ingroup AscendCL - * @brief Set jpege config's level. - * - * @param jpegeConfig [OUT] Call the acldvppCreateJpegeConfig - * interface to create acldvppJpegeConfig data - * @param level [IN] Encoding quality range [0, 100], - * where level 0 encoding quality is similar to level 100, - * and the smaller the value in [1, 100], - * the worse the quality of the output picture. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level); - -/** - * @ingroup AscendCL - * @brief Get jpege config's level. - * - * @param jpegeConfig [IN] jpege config. - * - * @retval compression level. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig); - -/** - * @ingroup AscendCL - * @brief create vdecChannelDesc.Channel description information - * when creating a video data processing channel. - * - * @retval null for failed. - * @retval other success - */ -ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc(); - -/** - * @ingroup AscendCL - * @brief destroy vdecChannelDesc. - * - * @par Function - * Can only destroy aclvdecChannelDesc type created - * through aclvdecCreateChannelDesc interface - * @param channelDesc [IN] channel description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - - * @see aclvdecCreateChannelDesc - */ -ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's channel id. - * - * @param channelDesc [OUT] vdec channel description. - * @param channelId [IN] decoding channel id: 0~15. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's thread id. - * - * @param channelDesc [OUT] vdec channel description. - * @param threadId [IN] thread id. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's callback function. - * - * @param channelDesc [OUT] vdec channel description. - * @param callback [IN] function callback.Function prototype: - * void (* aclvdecCallback) - * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCallback - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's video encoding type. - * - * @param channelDesc [OUT] vdec channel description. - * @param enType [IN] video encoding type. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's out picture format. - * - * @param channelDesc [OUT] vdec channel description. - * @param outPicFormat [IN] out picture format (acldvppPixelFormat). - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc, - acldvppPixelFormat outPicFormat); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's out picture width. - * - * @param channelDesc [OUT] vdec channel description. - * @param outPicWidth [IN] out picture width. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's out picture height. - * - * @param channelDesc [OUT] vdec channel description. - * @param outPicHeight [IN] out picture height. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's reference frame num. - * - * @param channelDesc [OUT] vdec channel description. - * @param refFrameNum [IN] reference frame num. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum); - -/** - * @ingroup AscendCL - * @brief Set vdec channel description's bit depth. - * - * @param channelDesc [OUT] vdec channel description. - * @param bitDepth [IN] bit depth. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's channel id. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval decoding channel id: 0~15. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's thread id. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval thread id. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's callback function. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval function callback.Function prototype: - * void (* aclvdecCallback) - * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) - * @retval default null. - * - * @see aclvdecCallback - */ -ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's video encoding type. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval video encoding type. - * @retval default H265_MAIN_LEVEL. - */ -ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's out picture format. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval out picture format. - * @retval default DVPP_OUTPUT_YUV420SP_UV. - */ -ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's out picture width. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval out picture width. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's out picture height. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval out picture height (for vdec malloc memory). - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's bit depth. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval bit depth. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get vdec channel description's reference frame num. - * - * @param channelDesc [IN] vdec channel description. - * - * @retval reference frame num. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief create vencChannelDesc. - * - * @retval null for failed, other success - */ -ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc(); - -/** - * @ingroup AscendCL - * @brief destroy vencChannelDesc. - * - * @param channelDesc [IN] channel desc. - * - * @retval ACL_SUCCESS:success, other:failed - */ -ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Set decoding thread id for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param threadId [IN] thread id - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId); - -/** - * @ingroup AscendCL - * @brief Set func callback for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param callback [IN] func callback - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback); - -/** - * @ingroup AscendCL - * @brief Set video encoding type for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param enType [IN] video encoding type - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType); - -/** - * @ingroup AscendCL - * @brief Set pic format for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param picFormat [IN] pic format - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc, - acldvppPixelFormat picFormat); - -/** - * @ingroup AscendCL - * @brief Set out pic width for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param picWidth [IN] pic width - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth); - -/** - * @ingroup AscendCL - * @brief Set pic height for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param picHeight [IN] pic height - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight); - -/** - * @ingroup AscendCL - * @brief Set key frame interval for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param keyFrameInterval [IN] Interval of key frame - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc, - uint32_t keyFrameInterval); - -/** - * @ingroup AscendCL - * @brief Set output buffer address for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param bufAddr [IN] output buffer address - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr); - -/** - * @ingroup AscendCL - * @brief Set output buffer size for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param bufSize [IN] output buffer size - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize); - -/** - * @ingroup AscendCL - * @brief Set rc model for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param rcMode [IN] venc rc mode(VBR=1, CBR=2) - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode); - -/** - * @ingroup AscendCL - * @brief Set source rate for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param srcRate [IN] source rate - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate); - -/** - * @ingroup AscendCL - * @brief Set max bit rate for venc channel desc. - * - * @param channelDesc [OUT] venc channel desc - * @param maxBitRate [IN] max bit rate - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate); - -/** - * @ingroup AscendCL - * @brief Get output buffer address for venc channel desc. - * - * @param channelDesc[IN] venc channel desc - * - * @retval output buffer address - */ -ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get output buffer size for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval output buffer size - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get decoding channel id for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval decoding channel id: 0~15, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get decoding thread id for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval thread id, default 0 - */ -ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get func callback for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval func callback, default null - */ -ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get video encoding type for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval video encoding type, default H265_MAIN_LEVEL - */ -ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get pic format for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval pic format - */ -ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get pic width for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval pic width, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get pic height for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval pic height, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Get interval of key frame for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval interval of key frame, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * - * @brief Get rc mode for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval rc mode, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * - * @brief Get source rate for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval source rate, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * - * @brief Get max bit rate for venc channel desc. - * - * @param channelDesc [IN] venc channel desc - * - * @retval max bit rate, default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief get forced restart of I-frame interval from config - * - * @param config [IN] venc frame config - * - * @retval 0: Not forced; 1: Forced restart of I-frame -1: error - */ -ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config); - -/** - * @ingroup AscendCL - * @brief get forced restart of I-frame interval from config - * - * @param config [IN] venc frame config - * - * @retval Whether it is the end frame: 0: no; 1: end frame - */ -ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config); - -/** - * @ingroup AscendCL - * @brief set single frame encoding configuration parameters - * - * @param config [OUT] venc frame config - * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame); - -/** - * @ingroup AscendCL - * @brief set single frame encoding configuration parameters - * - * @param config [OUT] venc frame config - * @param eos [IN] Whether it is the end frame: 0: no; 1: end frame - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos); - -/** - * @ingroup AscendCL - * @brief dvpp venc destroy frame config - * - * @param config [IN] venc frame config - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config); - -/** - * @ingroup AscendCL - * @brief Create dvpp venc frame config. - * - * @retval null for failed, other aclvencFrameConfig ptr - */ -ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig(); - -/** - * @ingroup AscendCL - * @brief Create dvpp venc channel. - * - * @param channelDesc [IN|OUT] venc channel desc - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp venc channel. - * - * @param channelDesc [IN] venc channel desc - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief dvpp venc launch send frame task. - * - * @param channelDesc [IN] venc channel desc - * @param input [IN] input picture desc - * @param reserve [IN] reserve parameter - * @param config [IN] dvpp frame config - * @param userdata [IN] user callback function - * - * @retval ACL_SUCCESS for ok, others for fail - */ -ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve, - aclvencFrameConfig *config, void *userdata); - -/** - * @ingroup AscendCL - * @brief Create dvpp stream description. - * - * @retval null for failed. - * @retval other success. - */ -ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc(); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp stream description. - * - * @par Function - * Can only destroy acldvppStreamDesc type created through - * acldvppCreateStreamDesc interface. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateStreamDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Set stream description's data addr. - * - * @param streamDesc [OUT] dvpp stream description. - * @param dataDev [IN] data addr. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev); - -/** - * @ingroup AscendCL - * @brief Set stream description's data size. - * - * @param streamDesc [OUT] dvpp stream description. - * @param size [IN] data size. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size); - -/** - * @ingroup AscendCL - * @brief Set stream description's format. - * - * @param streamDesc [OUT] dvpp stream description. - * @param format [IN] stream format. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format); - -/** - * @ingroup AscendCL - * @brief Set stream description's timestamp. - * - * @param streamDesc [OUT] dvpp stream description. - * @param timestamp [IN] current timestamp. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp); - -/** - * @ingroup AscendCL - * @brief Set stream description's ret code. - * - * @param streamDesc [OUT] dvpp stream description. - * @param retCode [IN] result code. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode); - -/** - * @ingroup AscendCL - * @brief Set stream description's eos. - * - * @param streamDesc [OUT] dvpp stream description. - * @param eos [IN] end flag of sequence. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos); - -/** - * @ingroup AscendCL - * @brief Get stream description's data addr. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval data addr. - * @retval deault nullptr. - */ -ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Get stream description's data size. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval data size. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Get stream description's format. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval stream format. - * @retval default ACL_DVPP_STREAM_H264. - */ -ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Get stream description's timestamp. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval current timestamp. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Get stream description's retCode. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval result code. - * @retval default 0. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Get stream description's eos. - * - * @param streamDesc [IN] dvpp stream description. - * - * @retval end flag of sequence. - * @retval default 0(false). - */ -ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc); - -/** - * @ingroup AscendCL - * @brief Create vdec frame config. - * - * @retval null for failed. - * @retval other success. - */ -ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig(); - -/** - * @ingroup AscendCL - * @brief Destroy vdec frame config. - * - * @par Function - * Can only destroy aclvdecFrameConfig type created through - * aclvdecCreateFrameConfig interface - * - * @param vdecFrameConfig [IN] vdec frame config. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCreateFrameConfig - */ -ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig); - -/** - * @ingroup AscendCL - * @brief Get image width and height of jpeg. - * - * @param data [IN] image data in host memory - * @param size [IN] the size of image data - * @param width [OUT] the width of image from image header - * @param height [OUT] the height of image from image header - * @param components [OUT] the components of image from image header - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, - int32_t *components); - -/** - * @ingroup AscendCL - * @brief Predict encode size of jpeg image. - * - * @param inputDesc [IN] dvpp image desc - * @param config [IN] jpeg encode config - * @param size [OUT] the size predicted of image - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, - const acldvppJpegeConfig *config, uint32_t *size); - -/** - * @ingroup AscendCL - * @brief Predict decode size of jpeg image. - * - * @param data [IN] origin image data in host memory - * @param dataSize [IN] the size of origin image data - * @param outputPixelFormat [IN] the pixel format jpeg decode - * @param decSize [OUT] the size predicted for decode image - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); - -/** - * @ingroup AscendCL - * @brief Get image width and height of png. - * - * @param data [IN] image data in host memory - * @param size [IN] the size of image data - * @param width [OUT] the width of image from image header - * @param height [OUT] the height of image from image header - * @param components [OUT] the components of image from image header - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, - uint32_t *height, int32_t *components); - -/** - * @ingroup AscendCL - * @brief Predict decode size of png image. - * - * @param data [IN] origin image data in host memory - * @param dataSize [IN] the size of origin image data - * @param outputPixelFormat [IN] the pixel format jpeg decode - * @param decSize [OUT] the size predicted for decode image - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); - -/** - * @ingroup AscendCL - * @brief Create dvpp channel, the same channel can be reused - * and is no longer available after destruction. - * - * @param channelDesc [IN|OUT] the channel destruction - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannelDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp channel. - * - * @par Restriction - * Can only destroy channel created through the acldvppCreateChannel interface - * - * @param channelDesc [IN] the channel destruction - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief dvpp vpc resize. - * - * @par Restriction - * Width alignment requirements: - * @li The minimum stride is 32 and the maximum is 4096 * 4 - * (that is, an image in argb format with a width of 4096); - * @li For 8K scaling, widthStride is required to be aligned to 2; - * @li For non 8K scaling, the calculation formula for widthStride - * is different for different image formats: - * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 - * @li yuv422packed: input image width * 2 and then align to 16 - * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 - * @li xrgb8888: input image width * 4, align to 16 - * @li HFBC:input image width - * Height alignment requirements: - * @li The height of the input image is aligned to 2. - * High stride minimum 6 and maximum 4096. - * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] resize input picture destruction - * @param outputDesc [IN|OUT] resize output picture destruction - * @param resizeConfig [IN] resize config - * @param stream [IN] resize task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc - * | acldvppCreateResizeConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc crop. - * - * @par Function - * crop the input picture according to the specified area, - * and then store the picture in the output memory as the output picture - * - * @par Restriction - * Width alignment requirements: - * @li The minimum stride is 32 and the maximum is 4096 * 4 - * (that is, an image in argb format with a width of 4096); - * @li For 8K scaling, widthStride is required to be aligned to 2; - * @li For non 8K scaling, the calculation formula for widthStride - * is different for different image formats: - * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 - * @li yuv422packed: input image width * 2 and then align to 16 - * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 - * @li xrgb8888: input image width * 4, align to 16 - * @li HFBC:input image width - * Height alignment requirements: - * @li The height of the input image is aligned to 2. - * High stride minimum 6 and maximum 4096. - * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] crop input picture destruction - * @param outputDesc [IN|OUT] crop output picture destruction - * @param cropArea [IN] crop area config - * @param stream [IN] crop task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc batch crop. - * - * @par Function - * crop the input batch picture according to the specified area - * as the output batch pictures - * - * @param channelDesc [IN] the channel destruction - * @param srcBatchPicDescs [IN] crop input batch picture destruction - * @param roiNums [IN] roi config numbers - * @param size [IN] roiNum size - * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction - * @param cropAreas [IN] crop area configs - * @param stream [IN] crop batch task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc crop and paste. - * - * @par Function - * crop the input picture according to the specified area, - * and paste the picture to the specified position of the target picture - * as the output picture - * - * @param channelDesc [IN] thechannel destruction - * @param inputDesc [IN] crop and paste input picture destruction - * @param outputDesc [IN|OUT] crop and paste output picture destruction - * @param cropArea [IN] crop area config - * @param pasteArea [IN] paste area config - * @param stream [IN] crop and paste task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppRoiConfig *pasteArea, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc batch crop and paste. - * - * @par Function - * crop the input batch picture according to the specified area, - * and paste the pictures to the specified position of the target pictures - * as the output batch pictures - * - * @param channelDesc [IN] the channel destruction - * @param srcBatchPicDescs [IN] crop input batch picture destruction - * @param roiNums [IN] roi config numbers - * @param size [IN] roiNum size - * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction - * @param cropAreas [IN] crop area configs - * @param pasteAreas [IN] paste area configs - * @param stream [IN] crop batch task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - acldvppRoiConfig *pasteAreas[], aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc jpeg decode. - * - * @par Function - * For different source picture formats, after decoding, - * output pictures in the following format: - * @li jpeg(444) -> YUV444SP:V is front U is back, - * YUV420 SP V is front U is back, YUV420SP U is front V is back; - * @li jpeg(422) -> YUV422SP:V is in front U is behind, - * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind; - * @li jpeg(420) -> YUV420SP: - * V is front U is back, YUV420SP U is front V is back; - * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80. - * - * @param channelDesc [IN] the channel destruction - * @param data [IN] decode input picture destruction's data - * @param size [IN] decode input picture destruction's size - * @param outputDesc [IN|OUT] decode output picture destruction - * @param stream [IN] decode task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc jpeg encode. - * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] encode input picture destruction - * @param data [OUT] encode output picture destruction's data - * @param size [IN|OUT] encode output picture destruction's size - * @param config [IN] jpeg encode config - * @param stream [IN] encode task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreateJpegeConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - const void *data, uint32_t *size, acldvppJpegeConfig *config, - aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc png decode. - * - * @param channelDesc [IN] the channel destruction - * @param data [IN] decode input picture destruction's data - * @param size [IN] decode input picture destruction's size - * @param outputDesc [IN|OUT] decode output picture destruction - * @param stream [IN] decode task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Create vdec channel. - * - * @par Function - * Create a channel for video data processing, - * the same channel can be reused, - * and is no longer available after destruction - * - * @param channelDesc [IN|OUT] the channel destruction - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCreateChannelDesc - */ -ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Destroy vdec channel. - * - * @par Function - * Can only destroy channels created by the aclvdecCreateChannel interface - * - * @param channelDesc [IN] the channel destruction - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCreateChannel - */ -ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief dvpp vdec send frame. - * - * @par Function - * Pass the input memory to be decoded - * and the decoded output memory to the decoder for decoding - * - * @param channelDesc [IN] vdec channel destruction - * @param input [IN] input stream destruction - * @param output [IN|OUT] output picture destruction - * @param config [IN] vdec frame config - * @param userData [IN] user data for callback function - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); - -/** - * @ingroup AscendCL - * @brief dvpp vdec send skipped frame. - * - * @par Function - * Pass video frame to decoder - * - * @param channelDesc [IN] vdec channel destruction - * @param input [IN] input stream destruction - * @param config [IN] vdec frame config - * @param userData [IN] user data for callback function - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame - */ -ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - aclvdecFrameConfig *config, void *userData); - -/** - * @ingroup AscendCL - * @brief dvpp vpc convert color. - * - * @par Restriction - * @li outputDesc:Width height stride, No changes are allowed. Just configure 0 - * @par Function - * Convert color gamut - * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] convert color input picture destruction - * @param outputDesc [IN|OUT] convert color output picture destruction - * @param stream [IN] convert color task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief dvpp vpc pyramid down. - * - * @par Restriction - * @li outputDesc:format only supported YUV400 - * @par Function - * Image pyramid down - * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] pyr down input picture destruction - * @param outputDesc [IN|OUT] pyr down output picture destruction - * @param reserve [IN] reserved param , must be nullptr - * @param stream [IN] pyr down task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Set dvpp channel mode. - * - * @param channelDesc [OUT] the channel destruction - * @param mode [IN] channel mode - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); - -/** - * @ingroup AscendCL - * @brief Set resize config interpolation. - * - * @param resizeConfig [OUT] the resize config - * @param interpolation [IN] interpolation - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig, - uint32_t interpolation); - -/** - * @ingroup AscendCL - * @brief Get resize config interpolation. - * - * @param resizeConfig [IN] the resize config - * - * @retval Interpolation of resize config. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig); - -/** - * @ingroup AscendCL - * @brief Set vdec channel out mode. - * - * @param channelDesc [OUT] the channel destruction - * @param outMode [IN] channel out mode - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); - -/** - * @ingroup AscendCL - * @brief Get vdec channel out mode. - * - * @param channelDesc [IN] the channel destruction - * - * @retval Out mode of channel destruction - * @retval default 0 - */ -ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc); - -/** - * @ingroup AscendCL - * @brief Create dvpp batch picture description. - * - * @param batchSize [IN] batch size - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize); - -/** - * @ingroup AscendCL - * @brief Get dvpp picture description. - * - * @param batchPicDesc [IN] dvpp batch picture description. - * @param index [IN] index of batch - * - * @retval null for failed. - * @retval OtherValues Failure - * - * @see acldvppCreateBatchPicDesc - */ -ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index); - -/** - * @ingroup AscendCL - * @brief Destroy dvpp batch picture description. - * - * @par Function - * Can only destroy batch picture description information created - * through acldvppCreateBatchPicDesc interface. - * - * @param batchPicDesc [IN] dvpp batch picture description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateBatchPicDesc - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc); - -/** - * @ingroup AscendCL - * @brief Create dvpp lut map. - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap(); - -/** - * @ingroup AscendCL - * @brief Destroy lut map. - * - * @param lutMap [IN] lut map - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap); - -/** - * @ingroup AscendCL - * @brief Get lut map dims. - * - * @param lutMap [IN] lut map - * - * @retval 0 for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); - -/** - * @ingroup AscendCL - * @brief Get lut map data. - * - * @param lutMap [IN] lut map - * @param dim [IN] input dim of map - * @param data [OUT] the dim of lut map's data - * @param len [OUT] the dim of lut map's length - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, - uint32_t *len); -/** - * @ingroup AscendCL - * @brief Vpc equalize hist. - * - * @param channelDesc [IN] channel desc - * @param inputDesc [IN] input desc - * @param outputDesc [IN|OUT] output desc - * @param lutMap [IN] lut map param - * @param stream [IN] runtime stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppLutMap *lutMap, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Create dvpp border config. - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); - -/** - * @ingroup AscendCL - * @brief Set value of border config. - * - * @param borderConfig [OUT] border config - * @param index [IN] index of value array - * @param value [IN] value - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, - double value); - -/** - * @ingroup AscendCL - * @brief Set border type of border config. - * - * @param borderConfig [OUT] border config - * @param borderType [IN] border type - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig, - acldvppBorderType borderType); - -/** - * @ingroup AscendCL - * @brief Set top of border config. - * - * @param borderConfig [OUT] border config - * @param top [IN] top of border - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top); - -/** - * @ingroup AscendCL - * @brief Set bottom of border config. - * - * @param borderConfig [OUT] border config - * @param bottom [IN] bottom of border - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom); - -/** - * @ingroup AscendCL - * @brief Set left of border config. - * - * @param borderConfig [OUT] border config - * @param left [IN] left of border - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left); - -/** - * @ingroup AscendCL - * @brief Set right of border config. - * - * @param borderConfig [OUT] border config - * @param right [IN] right of border - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right); - -/** - * @ingroup AscendCL - * @brief Get value of border config. - * - * @param borderConfig [IN] border config - * @param index[IN] index of value array - * - * @retval invalid value is < 0, normal Value is >= 0 - */ -ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index); - -/** - * @ingroup AscendCL - * @brief Get border type of border config. - * - * @param borderConfig [IN] border config - * @retval border type of border config - */ -ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Get right of border config. - * - * @param borderConfig [IN] border config - * - * @retval default 0, top value of border config - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Get Bottom of border config. - * - * @param borderConfig [IN] border config - * - * @retval default 0, top value of border config - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Get left of border config. - * - * @param borderConfig [IN] border config - * - * @retval default 0, top value of border config - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Get right of border config. - * - * @param borderConfig [IN] border config - * - * @retval default 0, right value of border config - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Destroy border config. - * - * @param borderConfig [IN] border config - * - * @retval ACL_SUCCESS for success, other for failure - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig); - -/** - * @ingroup AscendCL - * @brief Vpc make border. - * - * @param channelDesc [IN] channel desc - * @param inputDesc [IN] input desc - * @param outputDesc [IN|OUT] output desc - * @param borderConfig [IN] border config param - * @param stream [IN] runtime stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppBorderConfig *borderConfig, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Dvpp vpc calc hist. - * - * @param channelDesc [IN] the channel destruction - * @param srcPicDesc [IN] pyr down input picture destruction - * @param hist [IN|OUT] pyr down output picture destruction - * @param reserve [IN] reserved param, must be nullptr - * @param stream [IN] task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist - */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, - acldvppHist *hist, void *reserve, aclrtStream stream); - -/** - * @ingroup AscendCL - * @brief Create vpc hist description. - * - * @retval null for failed. - * @retval OtherValues success. - */ -ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); - -/** - * @ingroup AscendCL - * @brief Destroy vpc hist description. - * - * @par Function - * Can only destroy hist description information created - * through acldvppCreateHist interface. - * - * @param hist [IN] vpc hist description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateHist - */ -ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist); - -/** - * @ingroup AscendCL - * @brief Get dims of vpc hist description. - * - * @param hist [IN] vpc hist description. - * - * @retval dims of vpc hist description. - * - * @see acldvppCreateHist | acldvppVpcCalcHistAsync - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist); - -/** - * @ingroup AscendCL - * @brief Get data from vpc hist description by dim. - * - * @param hist [IN] vpc hist description. - * @param dim [IN] which dim to get data. - * @param data [OUT] address of output hist data. - * @param len [OUT] len of output hist data. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateHist | acldvppVpcCalcHistAsync - */ -ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len); - -/** - * @ingroup AscendCL - * @brief Get dvpp calc hist process return code. - * - * @param hist [IN] vpc hist description. - * - * @retval Dvpp calc hist process return code. - * - * @see acldvppCreateHist | acldvppVpcCalcHistAsync - */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); - -/** - * @ingroup AscendCL - * @brief Set vpc hist description to 0. - * - * @par Function - * Can only clear hist description information created - * through acldvppCreateHist interface. - * - * @param hist [IN] vpc hist description. - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see acldvppCreateHist - */ -ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); - -#ifdef __cplusplus -} -#endif - -#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h deleted file mode 100644 index 46d934e6..00000000 --- a/inc/external/hccl/hccl.h +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl.h - * @brief HCCL API - */ - -#ifndef HCCL_H_ -#define HCCL_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief Initialize HCCL. - * - * @param clusterInfo A string identifying the cluster info file path, include file name. - * @param rank A integer identifying the identify for the rank. - * @param comm A pointer identifying the initialized communication resource. - * @return HcclResult - * @see HcclCommDestroy() - */ -extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); - -/** - * @brief Get hccl root info. - * - * @param rootInfo A pointer identifying the hccl root info. - * @return HcclResult - */ -extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); - -/** - * @brief Initialize HCCL with root info. - * - * @param nRanks A integer identifying the rank size of the cluster. - * @param rootInfo A struct identifying the hccl root info. - * @param rank A integer identifying the identify for the rank. - * @param comm A pointer identifying the initialized communication resource. - * @return HcclResult - * @see HcclCommDestroy() - */ -extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); - -/** - * @brief AllReduce operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, - * float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, - HcclComm comm, aclrtStream stream); - -/** - * @brief Broadcast operator. - * - * @param buf A pointer identifying the data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param root An integer(u32) identifying the the root rank in the operator. - * @param comm A pointer identifying the communication resource based on - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, - aclrtStream stream); - -/** - * @brief ReduceScatter operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param recvCount An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, - HcclReduceOp op, HcclComm comm, aclrtStream stream); - -/** - * @brief AllGather operator. - * - * @param sendBuf A pointer identifying the input data address of the operator. - * @param recvBuf A pointer identifying the output data address of the operator. - * @param sendCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param comm A pointer identifying the communication resource based on. - * @param stream A pointer identifying the stream information. - * @return HcclResult - */ -extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, - aclrtStream stream); - -/** - * @brief Destroy HCCL comm - * - * @param comm A pointer identifying the communication resource targetting - * @return HcclResult - * @see HcclCommInitClusterInfo() - */ -extern HcclResult HcclCommDestroy(HcclComm comm); - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h deleted file mode 100644 index 0e832396..00000000 --- a/inc/external/hccl/hccl_types.h +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl_types.h - * @brief HCCL data type definition - * - */ - -#ifndef HCCL_TYPES_H_ -#define HCCL_TYPES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief HCCL functions return value definition - */ -typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ -} HcclResult; - -/** - * @brief handle to HCCL communicator - */ -typedef void *HcclComm; - -/** - * @brief HCCL Reduction opperation - */ -typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ -} HcclReduceOp; - -/** - * @brief HCCL data type - */ -typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ -} HcclDataType; - -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length - -/** - * @brief HCCL root info - */ -typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; -} HcclRootInfo; - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h deleted file mode 100644 index 0ae5303d..00000000 --- a/inc/external/runtime/rt_error_codes.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error - -static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error - -static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error - -#ifdef __cplusplus -} -#endif - -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ From 335e9e892cda6e8a4451f72c37b9bb1a32e64a87 Mon Sep 17 00:00:00 2001 From: yskhhh Date: Wed, 2 Dec 2020 12:56:46 +0800 Subject: [PATCH 006/127] test atc dynamic shape --- ge/common/CMakeLists.txt | 1 + ge/common/auth/file_saver.cc | 59 +++ ge/common/auth/file_saver.h | 7 + ge/common/ge_common.mk | 1 + ge/common/helper/model_helper.cc | 421 +++++++++++++++++-- ge/common/helper/om_file_helper.cc | 193 +++++++++ ge/common/types.cc | 2 +- ge/generator/ge_generator.cc | 54 ++- ge/model/ge_root_model.h | 7 +- inc/framework/common/helper/model_helper.h | 62 ++- inc/framework/common/helper/om_file_helper.h | 24 +- inc/framework/common/types.h | 7 +- 12 files changed, 769 insertions(+), 69 deletions(-) diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 9c588396..f95056a2 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -24,6 +24,7 @@ set(SRC_LIST "helper/om_file_helper.cc" "helper/model_helper.cc" "../model/ge_model.cc" + "../model/ge_root_model.cc" "auth/file_saver.cc" "fp16_t.cc" "math/fp16_math.cc" diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index 7b41397a..91fae074 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -258,6 +258,65 @@ FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header, Mod return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas) { + file_header.is_encrypt = ModelEncryptType::UNENCRYPTED; + + const Status ret = SaveWithFileHeader(file_path, file_header, model_partition_tables, all_partition_datas); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, FAILED, "save file failed, file_path:%s, file header len:%u.", + file_path.c_str(), file_header.length); + return SUCCESS; +} + +Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas) { + + GE_CHK_BOOL_EXEC(model_partition_tables.size() == all_partition_datas.size(), + return PARAM_INVALID, + "model table size %zu does not match partition size %zu", + model_partition_tables.size(), all_partition_datas.size()) + for (size_t index = 0; index < model_partition_tables.size(); ++index) { + auto &cur_partiton_data = all_partition_datas[index]; + auto &cur_model_partition_table = *model_partition_tables[index]; + GE_CHK_BOOL_RET_STATUS(!cur_partiton_data.empty() && cur_model_partition_table.num != 0 + && cur_model_partition_table.num == cur_partiton_data.size(), FAILED, + "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", + cur_model_partition_table.num, cur_partiton_data.size()); + } + + // Open file + int32_t fd = 0; + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED); + Status ret = SUCCESS; + do { + // Write file header + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; + break); + for (size_t index = 0; index < model_partition_tables.size(); ++index) { + // Write model partition table + auto &cur_tabel = *model_partition_tables[index]; + uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(cur_tabel)); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(&cur_tabel), table_size, fd) != SUCCESS, ret = FAILED; break); + // Write partition data + auto &cur_partition_datas = all_partition_datas[index]; + for (const auto &partition_data : cur_partition_datas) { + GELOGI("GC:size[%zu]", partition_data.size); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; + break); + } + } + } while (0); + // Close file + GE_CHK_BOOL_RET_STATUS(mmClose(fd) == EN_OK, FAILED, "Close file failed."); + return ret; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::SaveToFile(const string &file_path, const void *data, int len) { if (data == nullptr || len <= 0) { diff --git a/ge/common/auth/file_saver.h b/ge/common/auth/file_saver.h index 79e2126e..97fbaae5 100644 --- a/ge/common/auth/file_saver.h +++ b/ge/common/auth/file_saver.h @@ -74,6 +74,10 @@ class FileSaver { ModelPartitionTable &model_partition_table, const std::vector &partition_datas); + static Status SaveToFile(const string &file_path, ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas); + static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, const std::vector &partitionDatas, @@ -108,6 +112,9 @@ class FileSaver { static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, const std::vector &partition_datas); + static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header, + vector &model_partition_tables, + const vector> &all_partition_datas); }; } // namespace ge #endif // GE_COMMON_AUTH_FILE_SAVER_H_ diff --git a/ge/common/ge_common.mk b/ge/common/ge_common.mk index 3fffd203..e28090ad 100755 --- a/ge/common/ge_common.mk +++ b/ge/common/ge_common.mk @@ -7,6 +7,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ helper/om_file_helper.cc \ helper/model_helper.cc \ ../model/ge_model.cc \ + ../model/ge_root_model.cc \ auth/file_saver.cc \ fp16_t.cc \ math/fp16_math.cc \ diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 6f201461..67c4a80e 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -32,6 +32,7 @@ using domi::ModelTaskDef; namespace { const int64_t kOriginalOmPartitionNum = 1; +const uint32_t kStatiOmFileModelNum = 1; } @@ -39,7 +40,7 @@ namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); } Status ModelHelper::SaveModelPartition(std::shared_ptr &om_file_save_helper, ModelPartitionType type, - const uint8_t *data, size_t size) { + const uint8_t *data, size_t size, size_t model_index) { if (size < 1 || size > UINT32_MAX) { GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size); if (size > UINT32_MAX) { @@ -68,25 +69,16 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil partition_model.data = const_cast(data); partition_model.size = static_cast(size); partition_model.type = type; - if (om_file_save_helper->AddPartition(partition_model) != SUCCESS) { + if (om_file_save_helper->AddPartition(partition_model, model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu", size); return PARAM_INVALID; } return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, - const SaveParam &save_param, - const std::string &output_file, - ModelBufferData& model) { - if (output_file.empty()) { - GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); - return FAILED; - } - GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED); - std::shared_ptr om_file_save_helper = ge::MakeShared(); - GE_CHECK_NOTNULL(om_file_save_helper); +Status ModelHelper::SaveModelDef(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &model_buffer, size_t model_index) { ModelPtr model_tmp = ge::MakeShared(ge_model->GetName(), ge_model->GetPlatformVersion()); if (model_tmp == nullptr) { GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str()); @@ -96,16 +88,21 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod model_tmp->SetVersion(ge_model->GetVersion()); model_tmp->SetAttr(ge_model->MutableAttrMap()); - ge::Buffer model_buffer; + (void)model_tmp->Save(model_buffer); GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize()); if (model_buffer.GetSize() > 0) { if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(), - model_buffer.GetSize()) != SUCCESS) { + model_buffer.GetSize(), model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model graph partition failed"); return PARAM_INVALID; } } + return SUCCESS; +} + +Status ModelHelper::SaveModelWeights(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { auto ge_model_weight = ge_model->GetWeight(); GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); // weight is not necessary @@ -113,31 +110,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(), - ge_model_weight.GetSize()), "Add weight partition failed"); + ge_model_weight.GetSize(), model_index), "Add weight partition failed"); } + return SUCCESS; +} +Status ModelHelper::SaveModelTbeKernel(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); if (tbe_kernel_store.DataSize() > 0) { GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, - tbe_kernel_store.Data(), - tbe_kernel_store.DataSize()), "Add tbe kernel partition failed"); + ge_model->GetTBEKernelStore().Data(), + ge_model->GetTBEKernelStore().DataSize(), model_index), + "Add tbe kernel partition failed"); } - // no need to check value, DATA->NetOutput (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); + return SUCCESS; +} + +Status ModelHelper::SaveModelCustAICPU(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_index) { CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); if (cust_aicpu_kernel_store.DataSize() > 0) { GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::CUST_AICPU_KERNELS, - cust_aicpu_kernel_store.Data(), - cust_aicpu_kernel_store.DataSize()), + ge_model->GetCustAICPUKernelStore().Data(), + cust_aicpu_kernel_store.DataSize(), model_index), "Add cust aicpu kernel partition failed"); } + return SUCCESS; +} +Status ModelHelper::SaveModelTaskDef(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &task_buffer, size_t model_index) { std::shared_ptr model_task_def = ge_model->GetModelTaskDefPtr(); if (model_task_def == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); @@ -146,9 +156,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod size_t partition_task_size = model_task_def->ByteSizeLong(); GE_IF_BOOL_EXEC(partition_task_size == 0 || partition_task_size > INT_MAX, GELOGE(FAILED, "Model_def's byte size (%zu) is invalid!", partition_task_size); - return FAILED); + return FAILED); - ge::Buffer task_buffer(partition_task_size); + task_buffer = ge::Buffer(partition_task_size); if (task_buffer.GetSize() == 0) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc model task def buffer failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; @@ -159,14 +169,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GELOGD("TASK_INFO size is %zu", partition_task_size); if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(), - partition_task_size) != SUCCESS) { + partition_task_size, model_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Add model task def partition failed"); return PARAM_INVALID; } + return SUCCESS; +} + +Status ModelHelper::SaveModelHeader(std::shared_ptr &om_file_save_helper, + const GeModelPtr &ge_model, size_t model_num) { // Save target/version to model_header ModelFileHeader &model_header = om_file_save_helper->GetModelFileHeader(); model_header.platform_type = ge_model->GetPlatformType(); model_header.om_ir_version = ge_model->GetVersion(); + model_header.model_num = model_num; std::string platform_version = ge_model->GetPlatformVersion(); errno_t err; @@ -188,8 +204,142 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod } string model_name = reinterpret_cast(model_header.name); GELOGD("Model name save:%s", model_name.c_str()); + return SUCCESS; +} + +Status ModelHelper::SaveAllModelPartiton(std::shared_ptr& om_file_save_helper, + const GeModelPtr &ge_model, ge::Buffer &model_buffer, + ge::Buffer &task_buffer, size_t model_index) { + if (SaveModelDef(om_file_save_helper, ge_model, model_buffer, model_index) != SUCCESS) { + GELOGE(FAILED, "save model def failed"); + return FAILED; + } + + if (SaveModelWeights(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model weights failed"); + return FAILED; + } + + if (SaveModelTbeKernel(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model tbe kernel failed"); + return FAILED; + } + + if (SaveModelCustAICPU(om_file_save_helper, ge_model, model_index) != SUCCESS) { + GELOGE(FAILED, "save model cust ai cpu failed"); + return FAILED; + } + + + if (SaveModelTaskDef(om_file_save_helper, ge_model, task_buffer, model_index) != SUCCESS) { + GELOGE(FAILED, "save task def failed"); + return FAILED; + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, + const SaveParam &save_param, + const std::string &output_file, + ModelBufferData& model) { + if (output_file.empty()) { + GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); + return FAILED; + } - Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); + GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED); + std::shared_ptr om_file_save_helper = ge::MakeShared(); + GE_CHECK_NOTNULL(om_file_save_helper); + ge::Buffer model_buffer; + ge::Buffer task_buffer; + + auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffer, task_buffer); + if (ret != SUCCESS) { + GELOGE(ret, "save all model partition failed"); + return ret; + } + + ret = SaveModelHeader(om_file_save_helper, ge_model); + if (ret != SUCCESS) { + GELOGE(ret, "save model header failed"); + return ret; + } + + ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); + if (ret != SUCCESS) { + GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail."); + return ret; + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmRootModel( + const GeRootModelPtr &ge_root_model, + const SaveParam &save_param, + const std::string &output_file, + ModelBufferData& model, + bool is_unknown_shape) { + + GE_CHECK_NOTNULL(ge_root_model); + GE_IF_BOOL_EXEC(ge_root_model == nullptr, GELOGE(FAILED, "Ge_root_model is nullptr"); return FAILED); + + auto &name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); + GE_IF_BOOL_EXEC(name_to_ge_model.empty(), GELOGE(FAILED, "Ge_root_model has no sub model"); return FAILED); + GE_IF_BOOL_EXEC(output_file.empty(), + GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); + return FAILED); + + if (!is_unknown_shape) { + auto &model_root = name_to_ge_model.begin()->second; + return SaveToOmModel(model_root, save_param, output_file, model); + } + + std::shared_ptr om_file_save_helper = ge::MakeShared(); + GE_CHECK_NOTNULL(om_file_save_helper); + + auto &first_ge_model = name_to_ge_model.at(ge_root_model->GetRootGraph()->GetName()); + + // ge root model must be the first to be loaded + vector model_names{ge_root_model->GetRootGraph()->GetName()}; + for (auto &item : name_to_ge_model) { + if (item.first != model_names.front()) { + model_names.emplace_back(item.first); + } + } + + vector model_buffers(model_names.size()); + vector task_buffers(model_names.size()); + + size_t cur_index = 0; + + if (model_names.size() > 1) { + GELOGD("only save first model MODEL_DEF"); + if (SaveModelDef(om_file_save_helper, first_ge_model, model_buffers[cur_index], cur_index) != SUCCESS) { + GELOGE(FAILED, "save model def failed"); + return FAILED; + } + ++cur_index; + } + + for (; cur_index < model_names.size(); ++cur_index) { + auto model_name = model_names[cur_index]; + GELOGD("cur model %s index is %zu", model_name.c_str(), cur_index); + const GeModelPtr &ge_model = name_to_ge_model.at(model_name); + auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffers[cur_index], + task_buffers[cur_index], cur_index); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Save model %s failed", model_name.c_str()); + return INTERNAL_ERROR; + } + } + + auto ret = SaveModelHeader(om_file_save_helper, first_ge_model, model_names.size()); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Save model %s header failed", first_ge_model->GetName().c_str()); + return INTERNAL_ERROR; + } + + ret = om_file_save_helper->SaveRootModel(save_param, output_file.c_str(), model, is_offline_); if (ret != SUCCESS) { GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail."); return FAILED; @@ -288,7 +438,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c } file_header_ = reinterpret_cast(model_data.model_data); - OmFileLoadHelper om_load_helper; status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { @@ -310,7 +459,61 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c GELOGE(status, "GenerateGeModel failed"); return status; } + GELOGD("in ModelHelper::LoadModel, is_assign_model_ is setted to true!"); + is_assign_model_ = true; + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) { + if (model_data.model_data == nullptr || model_data.model_len == 0) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + + if (is_assign_model_) { + GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); + return GE_EXEC_LOAD_MODEL_REPEATED; + } + + if (ReleaseLocalModelData() != SUCCESS) { + GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); + return INTERNAL_ERROR; + } + + Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); + if (status != SUCCESS) { + GELOGE(status, "Parse model content failed!"); + return status; + } + + file_header_ = reinterpret_cast(model_data.model_data); + + //model verison 1.0 file header does not have model_num member + is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && + file_header_->model_num > kStatiOmFileModelNum; + GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); + OmFileLoadHelper om_load_helper; + if (is_unknown_shape_model_) { + auto model_num = file_header_->model_num; + status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_, model_num); + } else { + status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); + } + if (status != SUCCESS) { + GELOGE(status, "Om_load_helper init failed"); + model_addr_tmp_ = nullptr; + return status; + } + // Encrypt model need to del temp model/no encrypt model don't need to del model + model_addr_tmp_ = nullptr; + + status = GenerateGeRootModel(om_load_helper); + if (status != SUCCESS) { + GELOGE(status, "GenerateGeRootModel failed"); + return status; + } + GELOGD("in ModelHelper::LoadRootModel, is_assign_model_ is setted to true!"); is_assign_model_ = true; return SUCCESS; } @@ -341,6 +544,61 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { + GELOGD("Begin to generate ge root model"); + root_model_ = ge::MakeShared(); + GE_CHECK_NOTNULL(root_model_); + if (!is_unknown_shape_model_) { + if (GenerateGeModel(om_load_helper) != SUCCESS) { + GELOGE(FAILED, "GenerateGeModel failed"); + return FAILED; + } + GE_CHECK_NOTNULL(model_); + root_model_->SetRootGraph(GraphUtils::GetComputeGraph(model_->GetGraph())); + return SUCCESS; + } + + bool is_first_model = true; + for (size_t mode_index = 0; mode_index < file_header_->model_num; ++mode_index) { + GeModelPtr cur_model = ge::MakeShared(); + Status ret = LoadModelData(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; + } + + if (is_first_model) { + is_first_model = false; + root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph())); + root_model_->SetModelId(cur_model->GetModelId()); + model_ = cur_model; + continue; + } + + ret = LoadWeights(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; + } + + ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + } + + ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + } + + ret = LoadTask(om_load_helper, cur_model, mode_index); + if (ret != SUCCESS) { + return GE_EXEC_LOAD_TASK_PARTITION_FAILED; + } + root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); + } + + return SUCCESS; +} + Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) { ModelPartition partition_model_def; // no need to check value, DATA->NetOutput @@ -366,6 +624,28 @@ void ModelHelper::SetModelToGeModel(ge::Model &model) { model_->SetAttr(model.MutableAttrMap()); } +Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + ModelPartition partition_model_def; + // no need to check value, DATA->NetOutput + om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def, mode_index); + GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); + + ge::Model model; + if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Load model failed."); + return INTERNAL_ERROR; + } + + cur_model->SetGraph(model.GetGraph()); + cur_model->SetName(model.GetName()); + cur_model->SetVersion(model.GetVersion()); + cur_model->SetPlatformVersion(model.GetPlatformVersion()); + cur_model->SetAttr(model.MutableAttrMap()); + + return SUCCESS; +} + + Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { ModelPartition partition; if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition) != SUCCESS) { @@ -379,6 +659,19 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + ModelPartition partition; + if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition, mode_index) != SUCCESS) { + GELOGE(FAILED, "Get weight model partition failed."); + return FAILED; + } + ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size); + cur_model->SetWeight(weight); + + GELOGD("GetWeight size:%u", partition.size); + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper) { ModelPartition task_partition; if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { @@ -398,6 +691,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper, + GeModelPtr &cur_model, + size_t mode_index) { + ModelPartition task_partition; + if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition, mode_index) != SUCCESS) { + GELOGE(FAILED, "Get task model partition failed."); + return FAILED; + } + std::shared_ptr task = ge::MakeShared(); + GE_CHECK_NOTNULL(task); + if (task_partition.size != 0) { + if (!ReadProtoFromArray(task_partition.data, task_partition.size, task.get())) { + GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); + return INTERNAL_ERROR; + } + GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); + } + cur_model->SetModelTaskDef(task); + return SUCCESS; +} + Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { // Load tbe kernels ModelPartition partition_kernel_def; @@ -414,6 +728,23 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { return SUCCESS; } +Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) { + // Load tbe kernels + ModelPartition partition_kernel_def; + TBEKernelStore kernel_store; + if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def, mode_index) == + SUCCESS) { + GELOGD("Kernels partition size:%u", partition_kernel_def.size); + if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { + GELOGD("Load tbe kernels success"); + } else { + GELOGW("Load tbe kernels failed"); + } + } + cur_model->SetTBEKernelStore(kernel_store); + return SUCCESS; +} + Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { // Load cust aicpu kernels ModelPartition partition_kernel_def; @@ -421,19 +752,39 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) { GELOGD("Kernels partition size:%u", partition_kernel_def.size); if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { - GELOGI("Load cust aicpu kernels success"); + GELOGD("Load cust aicpu kernels success"); + } else { + GELOGW("Load cust aicpu kernels failed"); } } model_->SetCustAICPUKernelStore(kernel_store); return SUCCESS; } +Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, + GeModelPtr &cur_model, size_t mode_index) { + // Load cust aicpu kernels + ModelPartition partition_kernel_def; + CustAICPUKernelStore kernel_store; + if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def, mode_index) + == SUCCESS) { + GELOGD("Kernels partition size:%u", partition_kernel_def.size); + if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { + GELOGD("Load cust aicpu kernels success"); + } else { + GELOGW("Load cust aicpu kernels failed"); + } + } + cur_model->SetCustAICPUKernelStore(kernel_store); + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeModel() { if (model_ != nullptr) { return model_; } - GELOGI("Model has not been loaded!"); + GELOGD("Model has not been loaded!"); std::shared_ptr out_model = ge::MakeShared(); if (out_model == nullptr) { return nullptr; @@ -441,6 +792,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeMo return out_model; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeRootModelPtr ModelHelper::GetGeRootModel() { + if (root_model_ != nullptr) { + return root_model_; + } + + GELOGD("Model has not been loaded!"); + std::shared_ptr out_model = ge::MakeShared(); + if (out_model == nullptr) { + return nullptr; + } + return out_model; +} + + Status ModelHelper::ReleaseLocalModelData() noexcept { Status result = SUCCESS; if (model_addr_tmp_ != nullptr) { diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index ce88cd08..36217ca1 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -52,6 +52,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, + uint32_t model_data_size, + uint32_t model_num) { + Status status = LoadModelPartitionTable(model_data, model_data_size, model_num); + if (status != SUCCESS) { + return status; + } + is_inited_ = true; + return SUCCESS; +} + // Use both FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, ModelPartition &partition) { @@ -79,6 +90,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod return SUCCESS; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, + ModelPartition &partition, + size_t model_index) { + if (!is_inited_) { + GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!"); + return PARAM_INVALID; + } + if (model_index >= model_contexts_.size()) { + GELOGE(PARAM_INVALID, "cur index : %zu, model_contexts size:%zu", model_index, model_contexts_.size()); + return PARAM_INVALID; + } + auto &cur_ctx = model_contexts_[model_index]; + bool found = false; + for (ModelPartition &part : cur_ctx.partition_datas_) { + if (part.type == type) { + partition = part; + found = true; + break; + } + } + + if (!found) { + if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA && + type != ModelPartitionType::CUST_AICPU_KERNELS) { + GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast(type)); + return FAILED; + } + } + return SUCCESS; +} + Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Parameter validity check if (model.model_data == nullptr) { @@ -148,6 +190,61 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint return SUCCESS; } +Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t model_data_size, uint32_t model_num) { + if (model_data == nullptr) { + GELOGE(PARAM_INVALID, "Param model_data must not be null!"); + return PARAM_INVALID; + } + + uint32_t cur_offset = 0; + for (uint32_t index = 0; index < model_num; ++index) { + // Init partition table + auto partition_table = reinterpret_cast(model_data + cur_offset); + size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); + cur_offset += partition_table_size; + GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " + "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", + index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); + if (model_data_size <= cur_offset) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", + partition_table->num, model_data_size); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + + for (uint32_t i = 0; i < partition_table->num; i++) { + ModelPartition partition; + partition.size = partition_table->partition[i].mem_size; + partition.data = model_data + cur_offset; + partition.type = partition_table->partition[i].type; + if (index >= model_contexts_.size()) { + if (index != model_contexts_.size()) { + GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); + return FAILED; + } + + OmFileContext tmp_ctx; + tmp_ctx.partition_datas_.push_back(partition); + model_contexts_.push_back(tmp_ctx); + } else { + model_contexts_[index].partition_datas_.push_back(partition); + } + + if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + partition.size + cur_offset, model_data_size); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; + } + cur_offset += partition.size; + GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast(partition.type), partition.size, index); + } + } + if (cur_offset != model_data_size) { + GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); + return FAILED; + } + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::vector &OmFileSaveHelper::GetModelPartitions() const { return context_.partition_datas_; @@ -172,6 +269,28 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave return partition_table; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSaveHelper::GetPartitionTable( + size_t cur_ctx_index) { + auto &cur_ctx = model_contexts_[cur_ctx_index]; + auto partition_size = static_cast(cur_ctx.partition_datas_.size()); + // Build ModelPartitionTable, flex array + cur_ctx.partition_table_.clear(); + cur_ctx.partition_table_.resize(sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * partition_size, 0); + + auto partition_table = reinterpret_cast(cur_ctx.partition_table_.data()); + partition_table->num = partition_size; + + uint32_t mem_offset = 0; + for (uint32_t i = 0; i < partition_size; i++) { + ModelPartition partition = cur_ctx.partition_datas_[i]; + partition_table->partition[i] = {partition.type, mem_offset, partition.size}; + mem_offset += partition.size; + GELOGD("Partition, type:%d, size:%u", static_cast(partition.type), partition.size); + } + return partition_table; +} + + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPartition(ModelPartition &partition) { if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) { GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size); @@ -182,6 +301,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPar return SUCCESS; } +Status OmFileSaveHelper::AddPartition(ModelPartition &partition, size_t cur_index) { + if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) { + GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size); + return FAILED; + } + if (cur_index >= model_contexts_.size()) { + if (cur_index != model_contexts_.size()) { + GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", cur_index); + return FAILED; + } + OmFileContext tmp_ctx; + tmp_ctx.model_data_len_ += partition.size; + tmp_ctx.partition_datas_.push_back(partition); + model_contexts_.push_back(tmp_ctx); + } else { + model_contexts_[cur_index].model_data_len_ += partition.size; + model_contexts_[cur_index].partition_datas_.push_back(partition); + } + return SUCCESS; +} + Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline) { (void)save_param.cert_file; @@ -198,6 +338,10 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferData &model, bool is_offline) { #if !defined(NONSUPPORT_SAVE_TO_FILE) + if (context_.partition_datas_.empty()) { + GE_CHK_BOOL_EXEC(!model_contexts_.empty(), return FAILED, "mode contexts empty"); + context_ = model_contexts_.front(); + } uint32_t model_data_len = context_.model_data_len_; if (model_data_len == 0) { GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0"); @@ -231,4 +375,53 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat return SUCCESS; #endif } + +Status OmFileSaveHelper::SaveRootModel(const SaveParam &save_param, const char *output_file, + ModelBufferData &model, bool is_offline) { + (void)save_param.cert_file; + (void)save_param.ek_file; + (void)save_param.encode_mode; + (void)save_param.hw_key_file; + (void)save_param.pri_key_file; + +#if !defined(NONSUPPORT_SAVE_TO_FILE) + vector model_partition_tabels; + vector> all_model_partitions; + for (size_t ctx_index = 0; ctx_index < model_contexts_.size(); ++ctx_index) { + auto &cur_ctx = model_contexts_[ctx_index]; + uint32_t cur_model_data_len = cur_ctx.model_data_len_; + if (cur_model_data_len == 0) { + GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0"); + return domi::PARAM_INVALID; + } + + auto tmp_table = GetPartitionTable(ctx_index); + if (tmp_table == nullptr) { + GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL."); + return ge::GE_GRAPH_SAVE_FAILED; + } + uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*tmp_table); + FMK_UINT32_ADDCHECK(size_of_table, cur_model_data_len) + FMK_UINT32_ADDCHECK(size_of_table + cur_model_data_len, model_header_.length) + model_header_.length += size_of_table + cur_model_data_len; + model_partition_tabels.push_back(tmp_table); + all_model_partitions.push_back(cur_ctx.partition_datas_); + GELOGD("sizeof(ModelPartitionTable):%u, cur_model_data_len:%u, cur_context_index:%zu", + size_of_table, cur_model_data_len, ctx_index); + } + Status ret; + if (is_offline) { + ret = FileSaver::SaveToFile(output_file, model_header_, model_partition_tabels, all_model_partitions); + } else { + GELOGW("do not support save ge root model to buff now"); + return FAILED; + } + if (ret == SUCCESS) { + GELOGD("Save model success without encrypt."); + } + return ret; +#else + return SUCCESS; +#endif +} } // namespace ge diff --git a/ge/common/types.cc b/ge/common/types.cc index 54dc769f..1cc70347 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -801,7 +801,7 @@ const uint32_t XRGB_CHN_NUM = 4; /// const bool DEFAULT_GLOBAL_POOLING = false; -const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// +const uint32_t MODEL_VERSION = 0x20000000; ///< Model version 2.0/// // Eltwise's input size const int ELTWISE_MIN_INPUT_SIZE = 2; diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 16d63f6b..dc64aac1 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -240,6 +240,8 @@ class GeGenerator::Impl { Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); + Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); + Status SaveParams(GeModelPtr &ge_model, const string &type, const map &attrs, const vector &inputs, const vector &outputs); @@ -505,19 +507,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); - ModelHelper model_helper; - string model_name = ""; - Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); - if (name_ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); - GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); - return PARAM_INVALID; - } - map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); - ge_model->SetName(model_name); - ret = impl_->SaveModel(file_name_prefix, ge_model, model); + ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); if (ret != SUCCESS) { GELOGE(ret, "Save model failed"); if (impl_->graph_manager_.Finalize() != SUCCESS) { @@ -712,6 +702,44 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & return SUCCESS; } +Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootModelPtr &ge_root_model, + ModelBufferData &model_buff) { + bool is_unknown_shape = false; + auto ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape); + if (ret != SUCCESS) { + GELOGE(FAILED, "Check root model is unkonwn shape failed"); + return FAILED; + } + GELOGD("begin save root model, cur model is unkonwn shape model ? : %d", is_unknown_shape); + GE_CHK_BOOL_EXEC(!ge_root_model->GetSubgraphInstanceNameToModel().empty(), return FAILED, + "ge root model has no sub model") + GeModelPtr model_root = nullptr; + if (is_unknown_shape) { + model_root = make_shared(); + model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); + ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); + model_root->SetName(ge_root_model->GetRootGraph()->GetName()); + } else { + model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; + } + // set atc version + if (!SetAtcVersionInfo(*(model_root.get()))) { + GELOGW("SetPackageVersionInfo of atc failed!"); + } + // set opp version + if (!SetOppVersionInfo(*(model_root.get()))) { + GELOGW("SetPackageVersionInfo of ops failed!"); + } + ModelHelper model_helper; + model_helper.SetSaveMode(is_offline_); + ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape); + if (ret != SUCCESS) { + GELOGE(ret, "Save to om model failed"); + return ret; + } + return SUCCESS; +} + Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_root_model) { static std::atomic atomic_graph_id(0); diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h index 53174064..aa5a4d47 100755 --- a/ge/model/ge_root_model.h +++ b/ge/model/ge_root_model.h @@ -23,6 +23,7 @@ namespace ge { class GeRootModel { public: + GeRootModel() = default; explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID) {}; ~GeRootModel() = default; @@ -35,11 +36,11 @@ class GeRootModel { void SetModelId(uint32_t model_id) { model_id_ = model_id; } uint32_t GetModelId() const { return model_id_; } Status CheckIsUnknownShape(bool &is_dynamic_shape); - + void SetRootGraph(ComputeGraphPtr graph) { root_graph_ = graph; } private: - ComputeGraphPtr root_graph_; + ComputeGraphPtr root_graph_ = nullptr; std::map subgraph_instance_name_to_model_; - uint32_t model_id_; + uint32_t model_id_ = 0; }; } // namespace ge using GeRootModelPtr = std::shared_ptr; diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index 949d8b4c..7867e63d 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -25,6 +25,7 @@ #include "common/types.h" #include "graph/model.h" #include "model/ge_model.h" +#include "model/ge_root_model.h" namespace ge { class ModelHelper { @@ -32,17 +33,22 @@ class ModelHelper { ModelHelper() = default; ~ModelHelper(); - Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, - const std::string &output_file, ge::ModelBufferData &model); - Status SaveOriginalGraphToOmModel(const ge::Graph& graph, const std::string& output_file); + Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, + ge::ModelBufferData &model); + Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, const string &output_file, + ModelBufferData &model, bool is_unknown_shape); + Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); Status LoadModel(const ge::ModelData &model_data); - Status GetModelBufferData(ge::ModelBufferData& model); + Status LoadRootModel(const ge::ModelData &model_data); + Status GetModelBufferData(ge::ModelBufferData &model); - const ModelFileHeader* GetFileHeader() const { return file_header_; } + const ModelFileHeader *GetFileHeader() const { return file_header_; } GeModelPtr GetGeModel(); + GeRootModelPtr GetGeRootModel(); void SetSaveMode(bool val) { is_offline_ = val; } bool GetSaveMode(void) const { return is_offline_; } + bool GetModelType() const { return is_unknown_shape_model_; }; Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name); Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name); @@ -50,24 +56,46 @@ class ModelHelper { private: bool is_assign_model_ = false; bool is_offline_ = true; - ModelFileHeader* file_header_ = nullptr; + bool is_unknown_shape_model_ = false; + ModelFileHeader *file_header_ = nullptr; // Encrypted model need delete temp model and unencrypted model need not delete model uint8_t *model_addr_tmp_ = nullptr; uint32_t model_len_tmp_ = 0; GeModelPtr model_; + GeRootModelPtr root_model_; - ModelHelper(const ModelHelper&); - ModelHelper& operator=(const ModelHelper&); - Status GenerateGeModel(OmFileLoadHelper& om_load_helper); - Status LoadModelData(OmFileLoadHelper& om_load_helper); - void SetModelToGeModel(ge::Model& model); - Status LoadWeights(OmFileLoadHelper& om_load_helper); - Status LoadTask(OmFileLoadHelper& om_load_helper); - Status LoadTBEKernelStore(OmFileLoadHelper& om_load_helper); - Status LoadCustAICPUKernelStore(OmFileLoadHelper& om_load_helper); + ModelHelper(const ModelHelper &); + ModelHelper &operator=(const ModelHelper &); + Status GenerateGeModel(OmFileLoadHelper &om_load_helper); + Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); + Status LoadModelData(OmFileLoadHelper &om_load_helper); + void SetModelToGeModel(ge::Model &model); + Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadWeights(OmFileLoadHelper &om_load_helper); + Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadTask(OmFileLoadHelper &om_load_helper); + Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper); + Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); + Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper); + Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index); Status ReleaseLocalModelData() noexcept; - Status SaveModelPartition(std::shared_ptr& om_file_save_helper, - ModelPartitionType type, const uint8_t* data, size_t size); + Status SaveModelPartition(std::shared_ptr &om_file_save_helper, ModelPartitionType type, + const uint8_t *data, size_t size, size_t model_index); + Status SaveModelDef(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &model_buffer, size_t model_index = 0); + Status SaveModelWeights(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelTbeKernel(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelCustAICPU(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_index = 0); + Status SaveModelTaskDef(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &task_buffer, size_t model_index = 0); + Status SaveModelHeader(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + size_t model_num = 1); + Status SaveAllModelPartiton(shared_ptr &om_file_save_helper, const GeModelPtr &ge_model, + Buffer &model_buffer, Buffer &task_buffer, size_t model_index = 0); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 4ca54b50..98ad55d7 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -32,14 +32,14 @@ using std::vector; namespace ge { struct ModelPartition { ModelPartitionType type; - uint8_t* data = 0; + uint8_t *data = 0; uint32_t size = 0; }; struct OmFileContext { std::vector partition_datas_; std::vector partition_table_; - uint32_t model_data_len_; + uint32_t model_data_len_ = 0; }; struct SaveParam { @@ -57,15 +57,23 @@ class OmFileLoadHelper { Status Init(uint8_t *model_data, const uint32_t model_data_size); + Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); + Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index); + OmFileContext context_; + vector model_contexts_; + private: Status CheckModelValid(const ge::ModelData &model) const; Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); + Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + bool is_inited_{false}; }; @@ -79,15 +87,23 @@ class OmFileSaveHelper { Status AddPartition(ModelPartition &partition); + Status AddPartition(ModelPartition &partition, size_t cur_index); + const std::vector &GetModelPartitions() const; - Status SaveModel(const SaveParam &save_param, const char *target_file, - ge::ModelBufferData& model, bool is_offline = true); + Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, + bool is_offline = true); Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); + vector model_contexts_; + ModelFileHeader model_header_; OmFileContext context_; + + ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index); + + Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 441d0757..99c2ea03 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -529,7 +529,7 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); -enum InputMode { INPUT = 0, CONST_INPUT}; +enum InputMode { INPUT = 0, CONST_INPUT }; // Definition of the processing status enum of the process module enum ModelProcessState { @@ -605,7 +605,7 @@ static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; /// /// @brief length of the reserved field in the model file header /// -static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 79; +static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75; /// /// @ingroup domi_omg @@ -843,9 +843,10 @@ struct ModelFileHeader { uint32_t ops = 0; // Computing power (Kops) uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters uint32_t om_ir_version = 0; + uint32_t model_num = 0; uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; uint8_t platform_type = {0}; - uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 79 + uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75 }; static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; From 55fd4c4c14146ab4ddc8550a22ac62f5ef2e8f88 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 4 Dec 2020 10:07:57 +0800 Subject: [PATCH 007/127] decouple cce in graphengine --- .../load/new_model_manager/davinci_model.cc | 1 - .../task_info/kernel_task_info.cc | 22 +-- .../task_info/kernel_task_info.h | 8 +- .../new_model_manager/task_info/task_info.h | 4 +- .../aicore/aicore_node_executor.cc | 2 +- .../node_executor/aicore/aicore_op_task.cc | 6 +- .../aicpu/aicpu_node_executor.cc | 10 +- ge/single_op/single_op_model.cc | 12 +- .../task/aicpu_kernel_task_builder.cc | 6 +- inc/external/ge/ge_api_types.h | 2 + inc/framework/common/op/ge_op_utils.h | 155 +----------------- inc/framework/common/op/op_parser_util.h | 6 - inc/framework/common/taskdown_common.h | 73 +++++++++ 13 files changed, 112 insertions(+), 195 deletions(-) create mode 100644 inc/framework/common/taskdown_common.h diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 81d47b3b..3db9b5ca 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -16,7 +16,6 @@ #include "graph/load/new_model_manager/davinci_model.h" -#include #include #include #include diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 04607c02..034f88eb 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -66,7 +66,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci // get opcontext stored in model const domi::KernelContext &context = kernel_def.context(); // get kernel_type - kernel_type_ = static_cast(context.kernel_type()); + kernel_type_ = static_cast(context.kernel_type()); // get opdesc op_desc_ = davinci_model_->GetOpByIndex(context.op_index()); GE_CHECK_NOTNULL(op_desc_); @@ -88,13 +88,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci // get bin_file_key const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); // new aicpu kernel(rtCpuKernelLaunch) no need to check function - if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { + if (kernel_type_ == ccKernelType::CCE_AI_CORE) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); return RT_ERROR_TO_GE_STATUS(rt_ret);); - } else if (kernel_type_ == cce::ccKernelType::TE) { + } else if (kernel_type_ == ccKernelType::TE) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, @@ -111,7 +111,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci ctx_.opIndex2[i] = context.origin_op_index(i); } ctx_.opCount = context.origin_op_index_size(); - if (kernel_type_ == cce::ccKernelType::TE) { + if (kernel_type_ == ccKernelType::TE) { ctx_.opIndex = context.op_index(); uint16_t *args_offset_tmp = reinterpret_cast(const_cast(context.args_offset().data())); if (context.args_offset().size() / sizeof(uint16_t) < 1) { @@ -120,9 +120,9 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci } ret = InitTVMTask(args_offset_tmp[0], kernel_def); - } else if (kernel_type_ == cce::ccKernelType::CUSTOMIZED) { + } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { ret = InitAICPUCustomTask(context.op_index(), kernel_def); - } else if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { ret = InitAicpuTask(context.op_index(), kernel_def); } else { if (kernel_def.args().empty() || args_size_ == 0) { @@ -373,7 +373,7 @@ Status KernelTaskInfo::Distribute() { INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); - if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); // blockDim is reserved parameter, set to 1 rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), @@ -874,7 +874,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return INTERNAL_ERROR; } - if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type_ == ccKernelType::CUST_AI_CPU) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); } @@ -946,7 +946,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k GELOGI("Op debug is open in aicpu task info"); dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); } - if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type_ == ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; } @@ -1076,7 +1076,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) { const domi::KernelContext &context = kernel_def.context(); - ctx_.kernelType = static_cast(context.kernel_type()); + ctx_.kernelType = static_cast(context.kernel_type()); ctx_.opId = context.op_id(); ctx_.kernelFuncId = context.kernel_func_id(); ctx_.isFlowtable = context.is_flowtable(); @@ -1163,7 +1163,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u } cce::ccStatus_t cc_ret; std::string update_kernel_args = "ccUpdateKernelArgs"; - auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, + auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); if (cceUpdateKernelArgs == nullptr) { GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index f2945b0b..1f90ede1 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -43,7 +43,7 @@ class KernelTaskInfo : public TaskInfo { stream_id_(0), so_name_(""), kernel_name_(""), - kernel_type_(cce::ccKernelType::CCE_AI_CORE), + kernel_type_(ccKernelType::CCE_AI_CORE), dump_flag_(RT_KERNEL_DEFAULT), dump_args_(nullptr), op_desc_(nullptr), @@ -75,7 +75,7 @@ class KernelTaskInfo : public TaskInfo { Status Release() override; - cce::ccOpContext *GetCtx() override { return &ctx_; } + ccOpContext *GetCtx() override { return &ctx_; } FusionOpInfo *GetFusionOpInfo() override { return &fusion_op_info_; } @@ -92,7 +92,7 @@ class KernelTaskInfo : public TaskInfo { bool CallSaveDumpInfo() override { return call_save_dump_; }; - cce::ccOpContext ctx_; + ccOpContext ctx_; FusionOpInfo fusion_op_info_; private: @@ -153,7 +153,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t stream_id_; std::string so_name_; std::string kernel_name_; - cce::ccKernelType kernel_type_; + ccKernelType kernel_type_; uint32_t dump_flag_; void *dump_args_; OpDescPtr op_desc_; diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index d296d29e..fe9c8c37 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -20,7 +20,7 @@ #include #include "cce/customize.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/load/new_model_manager/ts_mem_mall.h" #include "graph/load/new_model_manager/task_info/task_info_factory.h" @@ -87,7 +87,7 @@ class TaskInfo { virtual Status Release() { return SUCCESS; } - virtual cce::ccOpContext *GetCtx() { return nullptr; } + virtual ccOpContext *GetCtx() { return nullptr; } virtual uint32_t GetTaskID() { return 0xFFFFFFFF; } diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 3b87c8b8..407210cf 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -15,7 +15,7 @@ */ #include "aicore_node_executor.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "hybrid/executor/hybrid_execution_context.h" namespace ge { diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 7ed14309..7687df29 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -15,7 +15,7 @@ */ #include "hybrid/node_executor/aicore/aicore_op_task.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" @@ -151,8 +151,8 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type != cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type != ccKernelType::TE) { GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); return INTERNAL_ERROR; } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 1a47e525..f2bd43d3 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -15,7 +15,7 @@ */ #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" #include "graph/load/new_model_manager/model_manager.h" @@ -642,8 +642,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { const std::string &so_name = kernel_def.so_name(); const OpDescPtr op_desc = node_item_->GetOpDesc(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::CUST_AI_CPU) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); } @@ -736,9 +736,9 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { const auto &so_name = task_def_.kernel().so_name(); const auto &kernel_name = task_def_.kernel().kernel_name(); const auto &kcontext = task_def_.kernel().context(); - auto kernel_type = static_cast(kcontext.kernel_type()); + auto kernel_type = static_cast(kcontext.kernel_type()); uint32_t flag = RT_KERNEL_DEFAULT; - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + if (kernel_type == ccKernelType::CUST_AI_CPU) { flag |= static_cast(RT_KERNEL_CUSTOM_AICPU); } auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 49968f4f..f6fe558a 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -237,8 +237,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (task_type == RT_MODEL_TASK_KERNEL) { const domi::KernelDef &kernel_def = task_def.kernel(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); @@ -249,7 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); single_op.tasks_.emplace_back(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; uint64_t singleop_kernel_id = aicpu_kernel_id++; @@ -388,13 +388,13 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { const domi::KernelDef &kernel_def = task_def.kernel(); const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::TE) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); single_op.op_task_.reset(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 26f6a166..600c9c29 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -15,7 +15,7 @@ */ #include "single_op/task/aicpu_kernel_task_builder.h" -#include "cce/taskdown_common.hpp" +#include "framework/common/taskdown_common.h" #include "graph/load/new_model_manager/model_manager.h" namespace ge { @@ -58,8 +58,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { task.op_desc_ = op_desc_; const auto &context = kernel_def_.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::CUST_AI_CPU) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 374a816a..cce17f93 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -369,6 +369,7 @@ static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); // for interface: aclgrphBuildModel +#ifdef __GNUC__ const std::set ir_builder_suppported_options = {INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, @@ -424,6 +425,7 @@ const std::set global_options = {CORE_TYPE, DEBUG_DIR, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE}; +#endif } // namespace ir_option } // namespace ge diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 4718b180..aa50c8a1 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -17,7 +17,6 @@ #ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ -#include #include #include @@ -32,7 +31,6 @@ #include "proto/insert_op.pb.h" namespace ge { -using namespace cce; using domi::Status; // Add Sub Mul @@ -76,18 +74,7 @@ class OpUtils { static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); } - /// - /// @ingroup domi_omg - /// @brief Convert the dimension of array according to different format - /// @param [in] src_format src_shape format - /// @param [in] src Dimension array to be converted - /// @param [in] dst_format Target format after conversion - /// @param [out] dst Dimension array after conversion - /// @return SUCCESS success - /// @return FAILED fail - /// - static bool ConvertDim(ccTensorFormat_t src_format, const std::vector &src, ccTensorFormat_t dst_format, - std::vector &dst); + /// /// @ingroup domi_omg /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim @@ -97,73 +84,6 @@ class OpUtils { /// @return false skip /// static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description, which is used for input and output. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description, which is used for input and output. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [in] dst_data_type data_type of the target cc_tensor - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, int32_t dst_data_type, - ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description for bias. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - /// - static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, ccTensorDescriptor_t &cc_tensor); - /// - /// @ingroup domi_ome - /// @brief Initialize the tensor description for bias. - /// @param [in] model_tensor Tensor information defined by the offline model - /// @param [in] dst_data_type data_type of the target cc_tensor - /// @param [out] cc_tensor Tensor definition used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, int32_t dst_data_type, - ccTensorDescriptor_t &cc_tensor); - - static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, - ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 4); - /// - /// @ingroup domi_ome - /// @brief Destroys a tensor - /// @param [inout] cc_tensor Tensor definition used by CC - /// - static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor) noexcept; - - /// - /// @ingroup domi_ome - /// @brief Destroys a tensor - /// @param [inout] cc_filter cc_filter Definition of the filter used by CC - /// - static void DestroyFilterDescriptor(ccFilterDescriptor_t &cc_filter); - - /// - /// @ingroup domi_ome - /// @brief Initializing Filter Description - /// @param [in] model_filter Filter information defined in the offline model - /// @param [out] cc_filter Definition of the filter used by CC - /// @return SUCCESS success - /// @return FAILED fail - /// - static Status InitFilterDescriptor(const ge::GeTensor &model_filter, ccFilterDescriptor_t &cc_filter); /// /// @brief Extract AIPP parameters from AttrDefMap and splice them @@ -209,16 +129,7 @@ class OpUtils { /// @param [out] output Data pointer after conversion. The format is HWCK /// static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output); - /// - /// @ingroup domi_omg - /// @brief Initialize the input and output description of the data node which is applied to filter weight in the - /// training network - /// @param [in] model_tensor input and output tensor information - /// @param [out] cc_tensor Tensor in CCE format after conversion - /// - static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor); - - static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor); + static vector GetWeights(const ge::Node &node); static vector GetWeights(ge::ConstNodePtr node); static vector MutableWeights(const ge::Node &node); @@ -228,69 +139,7 @@ class OpUtils { static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector &dims); private: - friend class CceTensorDescriptor; static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); }; - -class CceTensorDescriptor; - -using CceTensorDescriptorPtr = std::shared_ptr; - -class CceTensorDescriptor { - public: - explicit CceTensorDescriptor(ccTensorDescriptor_t cc_tensor); - CceTensorDescriptor(const CceTensorDescriptor &) = delete; - CceTensorDescriptor &operator=(const CceTensorDescriptor &) = delete; - - ~CceTensorDescriptor(); - - ccTensorDescriptor_t GetPtr() { return cc_tensor_; } - - /// - /// @brief Initializes the tensor based on shape information. - /// @param[in] format data permutation format - /// @param[in] data_type Data Type - /// @param[in] dim dim information - /// @return return code - /// - Status InitTensor(int32_t format, int32_t data_type, const std::vector &dims); - - Status InitTensor(int32_t format, int32_t data_type, const ge::GeShape &shape); - - /// - /// @brief get format of tensor - /// @param[out] format format of the tensor - /// @return return code - /// - Status GetFormat(ccTensorFormat_t *format); - - /// - /// @brief Obtains the size of the tensor. - /// @param[out] size size of Tensor - /// @return return code - /// - Status GetTensorSizeInBytes(uint32_t *size); - - /// - /// @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0) - /// @param [in] xDesc descriptor of input tensor - /// @param [in] x point to input data in host memory - /// @param [in] dataTypeTransmode mode of data type transform - /// @param [in] yDesc descriptor of output tensor - /// @param [in|out] y point to output data in host memory - /// @param [in] ySizeInBytes size of outputData - /// @return return code - /// - static Status TransTensor(const ccTensorDescriptor_t xDesc, const void *x, const CceTensorDescriptorPtr &yDesc, - void *y, uint32_t ySizeInBytes); - - /// - /// @brief CceTensorDescriptor Static Constructor - /// @return CceTensorDescriptor smart pointer - /// - static CceTensorDescriptorPtr Create(); - - ccTensorDescriptor_t cc_tensor_ = nullptr; -}; } // namespace ge #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h index 49b4350a..43254ca9 100644 --- a/inc/framework/common/op/op_parser_util.h +++ b/inc/framework/common/op/op_parser_util.h @@ -17,7 +17,6 @@ #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ -#include #include #include #include @@ -31,10 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1; const uint32_t NORMAL_WORKSPACE_NUM = 0; const int32_t NORMAL_1D_DIM_NUM = 1; const int32_t NORMAL_SCALE_DIM_NUM = 0; -const int NORMAL_TENSOR_FORMAT = static_cast(cce::CC_TENSOR_NC1HWC0); const int NORMAL_TENSOR_SIZE = 4; -const int NORMAL_DEVICE_DATA_TYPE = static_cast(cce::CC_DATA_HALF); -const int DEFAULT_POOLING_MODE = static_cast(cce::CC_POOLING_MAX); const uint32_t DEFAULT_REAL_DIM_CNT = 4; // const @@ -183,7 +179,6 @@ const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0; const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; -const int SSD_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; @@ -200,7 +195,6 @@ const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; -const int REFINEDET_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast(cce::CC_BOX_CENTER_SIZE); const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0; diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h new file mode 100644 index 00000000..b1364d16 --- /dev/null +++ b/inc/framework/common/taskdown_common.h @@ -0,0 +1,73 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ +#define INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ + +#include "runtime/rt.h" + +using namespace std; + +namespace ge { + +#define CC_FUSION_OP_MAX 32 + +typedef enum tagCcStatus { + CC_STATUS_SUCCESS = 0, /**< succ */ + CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ + CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ + CC_STATUS_BAD_PARAM = 3, /**< para check failed */ + CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */ + CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */ + CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ + CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ + CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ + CC_STATUS_RESERVED /**< just for check */ +} ccStatus_t; + +typedef enum tagccKernelType { + CCE_AI_CORE = 0, /* cce aicore */ + CCE_AI_CPU = 1, /* cce aicpu */ + TE = 2, /* te operator*/ + CUSTOMIZED = 3, /* customized operator */ + TE_AI_CORE = 4, /* te aicore operator*/ + TE_AI_CPU = 5, /* te aicpu operator */ + AI_CPU = 6, /* aicpu */ + CUST_AI_CPU = 7, /* custom aicpu*/ + INVALID = 8, /* unknown kernel type */ +} ccKernelType; + +typedef struct tagOpContext { + ccKernelType kernelType; + uint32_t opId; + uint32_t kernelFuncId; + uint32_t opIndex; + uint32_t opCount; + uint32_t opIndex2[CC_FUSION_OP_MAX]; + bool isFlowtable; + uint16_t *argsOffset; + uint32_t argsCount; + uint64_t genDataBaseAddr; + uint64_t genDataBaseSize; + uint64_t genWeightBaseAddr; + uint64_t genWeightBaseSize; + uint64_t genVariableBaseAddr; + uint64_t genVariableBaseSize; + uint64_t l2ctrlSize; +} ccOpContext; +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ From 7d8aa2e94af4f24d87e95da8b48afddee32ce33b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 4 Dec 2020 11:22:36 +0800 Subject: [PATCH 008/127] graphengine support windows new changes --- ge/ge_local_engine/engine/host_cpu_engine.cc | 1 - ge/graph/manager/host_mem_manager.cc | 2 +- ge/host_kernels/ssd_prior_box_kernel.cc | 2 +- ge/host_kernels/ssd_prior_box_kernel.h | 2 +- ge/hybrid/executor/hybrid_profiler.h | 2 +- ge/hybrid/model/hybrid_model_builder.cc | 2 +- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 4 ++-- 7 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index f1e152f4..b14cbb3d 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -14,7 +14,6 @@ * limitations under the License. */ #include "host_cpu_engine.h" -#include #include "graph/common/omg_util.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_adapter.h" diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index d4aceddd..c99c9e87 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -106,7 +106,7 @@ Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_add GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); return INTERNAL_ERROR; } - base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); + base_addr = static_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); data_size = var_memory_base_map_[op_name].mem_size; return SUCCESS; } diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index b3a0fc3e..b93a4047 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -180,7 +180,7 @@ Status SsdPriorboxKernel::SetVariance(const vector &variance, const int d return SUCCESS; } -Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, +Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width, int layer_height, int &num_priors, int &dim_size) const { if (ge::CheckUint32MulOverflow(min_sizes_size, aspect_ratios_size) != SUCCESS) { diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h index 0ebf221d..3894bad9 100755 --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -100,7 +100,7 @@ class SsdPriorboxKernel : public Kernel { * @return OTHERS: Execution failed * @author */ - Status GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, int layer_width, + Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width, int layer_height, int &num_priors, int &dim_size) const; void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector &result); std::unique_ptr BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index 62ef9c73..f6027a0b 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -33,7 +33,7 @@ class HybridProfiler { SHAPE_INFERENCE, COMPILE, EXECUTION, - CALLBACK, + CALLBACK }; struct Event { diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index f4da3dcf..a40a7b17 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -793,7 +793,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr "Shape size is invalid"); auto offset = static_cast(elem_num * kBytes); auto hbm_raw_data_base_addr = - reinterpret_cast(reinterpret_cast(var_addr) + offset); + static_cast(reinterpret_cast(var_addr) + offset); for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index f2bd43d3..db3c91cd 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -723,9 +723,9 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); // if has input and output, need copy to ioaddr - error_t cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), + int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); - GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR, + GE_CHK_BOOL_RET_STATUS(cpy_ret == 0, INTERNAL_ERROR, "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); return SUCCESS; From c59a698bcef91ace757f2969191a71f3acfd34a1 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 4 Dec 2020 12:57:25 +0800 Subject: [PATCH 009/127] update ccStatus --- ge/graph/load/new_model_manager/task_info/kernel_task_info.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 034f88eb..329f8e54 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1161,9 +1161,9 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error); return FAILED; } - cce::ccStatus_t cc_ret; + ccStatus_t cc_ret; std::string update_kernel_args = "ccUpdateKernelArgs"; - auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, + auto cceUpdateKernelArgs = (ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); if (cceUpdateKernelArgs == nullptr) { GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); From 102f0517f881edaceec23624b7388a4ad0156f96 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 4 Dec 2020 13:06:20 +0800 Subject: [PATCH 010/127] update cce depend --- ge/graph/load/new_model_manager/task_info/kernel_task_info.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 329f8e54..3e3a715d 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1189,7 +1189,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u GELOGW("Failed to close handle %s", error); return FAILED; } - if (cc_ret != cce::CC_STATUS_SUCCESS) { + if (cc_ret != CC_STATUS_SUCCESS) { GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); return CCE_FAILED; } From 87704d85127a354f78d1b0f367746792b8f6131a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Fri, 4 Dec 2020 14:04:45 +0800 Subject: [PATCH 011/127] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?= =?UTF-8?q?!499=20:=20inference=20supports=20dynamic=20shape'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/common/ge/op_tiling_manager.cc | 4 - ge/common/ge/op_tiling_manager.h | 1 - ge/executor/CMakeLists.txt | 94 +------- ge/executor/ge_executor.cc | 73 +----- ge/executor/module.mk | 84 +------ ge/ge_local_engine/CMakeLists.txt | 2 +- ge/ge_local_engine/engine/host_cpu_engine.h | 2 +- ge/graph/build/graph_builder.cc | 51 ----- ge/graph/load/graph_loader.cc | 7 +- ge/graph/load/graph_loader.h | 3 +- .../load/new_model_manager/davinci_model.cc | 88 +++---- .../load/new_model_manager/davinci_model.h | 7 +- .../load/new_model_manager/model_manager.cc | 48 +--- .../load/new_model_manager/model_manager.h | 5 +- ge/graph/partition/dynamic_shape_partition.cc | 47 +--- ge/graph/partition/dynamic_shape_partition.h | 1 - .../passes/transop_breadth_fusion_pass.cc | 2 +- ge/host_cpu_engine/CMakeLists.txt | 6 +- .../executor/hybrid_model_async_executor.cc | 38 ---- .../executor/hybrid_model_async_executor.h | 5 - ge/hybrid/hybrid_davinci_model.cc | 79 ------- ge/hybrid/hybrid_davinci_model.h | 21 -- ge/hybrid/hybrid_davinci_model_stub.cc | 32 --- ge/hybrid/model/hybrid_model.cc | 215 +----------------- ge/hybrid/model/hybrid_model.h | 26 --- ge/hybrid/model/hybrid_model_builder.cc | 30 --- .../node_executor/aicore/aicore_op_task.cc | 56 ----- .../node_executor/aicore/aicore_op_task.h | 1 - inc/framework/executor/ge_executor.h | 16 -- 29 files changed, 60 insertions(+), 984 deletions(-) diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index db959368..9b5ba2d7 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -88,8 +88,4 @@ void OpTilingManager::LoadSo() { } } -OpTilingManager &OpTilingManager::GetInstance() { - static OpTilingManager instance; - return instance; -} } // namespace ge diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index 17761969..d4e7f34e 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -25,7 +25,6 @@ using SoToHandleMap = std::map; class OpTilingManager { public: OpTilingManager() = default; - static OpTilingManager &GetInstance(); ~OpTilingManager(); void LoadSo(); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index d7dfdc84..de8025f3 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -72,89 +72,7 @@ set(SRC_LIST "../single_op/task/tbe_task_builder.cc" "../single_op/task/aicpu_task_builder.cc" "../single_op/task/aicpu_kernel_task_builder.cc" - "../hybrid/common/tensor_value.cc" - "../hybrid/common/npu_memory_allocator.cc" - "../hybrid/executor/rt_callback_manager.cc" - "../hybrid/executor/node_state.cc" - "../hybrid/executor/node_done_manager.cc" - "../hybrid/executor/hybrid_profiler.cc" - "../hybrid/executor/hybrid_model_executor.cc" - "../hybrid/executor/hybrid_model_async_executor.cc" - "../hybrid/executor/hybrid_execution_context.cc" - "../hybrid/executor/subgraph_context.cc" - "../hybrid/executor/subgraph_executor.cc" - "../hybrid/executor/worker/task_compile_engine.cc" - "../hybrid/executor/worker/shape_inference_engine.cc" - "../hybrid/executor/worker/execution_engine.cc" - "../hybrid/model/hybrid_model.cc" - "../hybrid/model/hybrid_model_builder.cc" - "../hybrid/model/node_item.cc" - "../hybrid/model/graph_item.cc" - "../hybrid/node_executor/aicore/aicore_node_executor.cc" - "../hybrid/node_executor/aicore/aicore_op_task.cc" - "../hybrid/node_executor/aicore/aicore_task_builder.cc" - "../hybrid/node_executor/aicpu/aicpu_node_executor.cc" - "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" - "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" - "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" - "../hybrid/node_executor/host_cpu/kernel_factory.cc" - "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" - "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" - "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" - "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" - "../hybrid/node_executor/controlop/control_op_executor.cc" - "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" - "../hybrid/node_executor/rts/rts_node_executor.cc" - "../hybrid/node_executor/node_executor.cc" - "../hybrid/node_executor/task_context.cc" - "../hybrid/hybrid_davinci_model.cc" - "../ge_local_engine/engine/host_cpu_engine.cc" - "../graph/common/omg_util.cc" - "../graph/manager/host_mem_manager.cc" - "../graph/build/memory/var_mem_assign_util.cc" - "../host_kernels/transpose_kernel.cc" - "../host_kernels/add_kernel.cc" - "../host_kernels/broadcast_args_kernel.cc" - "../host_kernels/broadcast_gradient_args_kernel.cc" - "../host_kernels/cast_kernel.cc" - "../host_kernels/concat_offset_kernel.cc" - "../host_kernels/concat_v2_kernel.cc" - "../host_kernels/dynamic_stitch_kernel.cc" - "../host_kernels/identity_kernel.cc" - "../host_kernels/empty_kernel.cc" - "../host_kernels/expanddims_kernel.cc" - "../host_kernels/fill_kernel.cc" - "../host_kernels/floordiv_kernel.cc" - "../host_kernels/floormod_kernel.cc" - "../host_kernels/gather_v2_kernel.cc" - "../host_kernels/greater_kernel.cc" - "../host_kernels/kernel_utils.cc" - "../host_kernels/maximum_kernel.cc" - "../host_kernels/mul_kernel.cc" - "../host_kernels/pack_kernel.cc" - "../host_kernels/permute_kernel.cc" - "../host_kernels/range_kernel.cc" - "../host_kernels/rank_kernel.cc" - "../host_kernels/reduce_prod_kernel.cc" - "../host_kernels/reshape_kernel.cc" - "../host_kernels/rsqrt_kernel.cc" - "../host_kernels/shape_kernel.cc" - "../host_kernels/shape_n_kernel.cc" - "../host_kernels/size_kernel.cc" - "../host_kernels/slice_d_kernel.cc" - "../host_kernels/slice_kernel.cc" - "../host_kernels/squeeze_kernel.cc" - "../host_kernels/unsqueeze_kernel.cc" - "../host_kernels/ssd_prior_box_kernel.cc" - "../host_kernels/strided_slice_kernel.cc" - "../host_kernels/sub_kernel.cc" - "../host_kernels/transdata_kernel.cc" - "../host_kernels/unpack_kernel.cc" - "../graph/passes/pass_utils.cc" - "../graph/common/bcast.cc" - "../common/fp16_t.cc" - "../common/formats/format_transfers/format_transfer_transpose.cc" - "../common/formats/utils/formats_trans_utils.cc" + "../hybrid/hybrid_davinci_model_stub.cc" ) ######## libge_executor.a ######## @@ -187,9 +105,9 @@ target_include_directories(ge_executor PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor PRIVATE @@ -229,9 +147,9 @@ target_include_directories(ge_executor_shared PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor_shared PRIVATE @@ -240,7 +158,7 @@ target_link_libraries(ge_executor_shared PRIVATE -Wl,--no-as-needed ge_common runtime - slog + slog mmpa graph register diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index d03a8d7b..7ceb9b05 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -39,8 +39,6 @@ #include "graph/manager/graph_var_manager.h" #include "graph/load/new_model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" -#include "graph/opsproto_manager.h" -#include "ge_local_engine/engine/host_cpu_engine.h" using std::string; using std::vector; @@ -223,33 +221,6 @@ class ModelListenerAdapter : public ModelListener { std::shared_ptr listener; }; -static void InitOpsProtoManger() { - string opsproto_path; - const char *path_env = std::getenv("ASCEND_OPP_PATH"); - if (path_env != nullptr) { - string path = path_env; - string file_path = RealPath(path.c_str()); - if (file_path.empty()) { - GELOGE(FAILED, "File path %s is invalid.", path.c_str()); - return; - } - opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); - GELOGI("Get opsproto so path from env : %s", path.c_str()); - } else { - string path_base = PluginManager::GetPath(); - GELOGI("path_base is %s", path_base.c_str()); - path_base = path_base.substr(0, path_base.rfind('/')); - path_base = path_base.substr(0, path_base.rfind('/') + 1); - opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); - } - - GELOGI("Get opsproto path is %s", opsproto_path.c_str()); - OpsProtoManager *manager = OpsProtoManager::Instance(); - map option_tmp; - option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); - (void)manager->Initialize(option_tmp); -} - GeExecutor::GeExecutor() {} Status GeExecutor::Initialize() { @@ -259,16 +230,6 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } - OpTilingManager::GetInstance().LoadSo(); - - Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); - if (initHostCpuEngineStatus != SUCCESS) { - GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); - return initHostCpuEngineStatus; - } - - InitOpsProtoManger(); - std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); @@ -638,16 +599,10 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return ACL_ERROR_GE_INTERNAL_ERROR; } - std::shared_ptr hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - uint64_t session_id = hybrid_davinci_model->GetSessionId(); + std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); + if (davinci_model != nullptr) { + uint64_t session_id = davinci_model->GetSessionId(); VarManagerPool::Instance().RemoveVarManager(session_id); - } else { - std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); - if (davinci_model != nullptr) { - uint64_t session_id = davinci_model->GetSessionId(); - VarManagerPool::Instance().RemoveVarManager(session_id); - } } ret = GraphLoader::UnloadModel(model_id); if (ret != SUCCESS) { @@ -977,26 +932,6 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat */ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, ge::RunModelData &run_output_data, bool async_mode) { - std::vector input_desc = {}; - std::vector output_desc = {}; - return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); -} - -/** -* @ingroup ge -* @brief Synchronous execution of offline model(Do not create thread) -* @param [in] uint32_t model_id: Model ID to execute - void* stream: stream to execute - const domi::InputData *input_data: Model input data - const std::vector &input_desc: Description of model input data - bool async_mode: is asynchronize mode -* @param [out] domi::OutputData *output_data: Model output data -* @param [out] std::vector &output_desc: Description of model output data -* @return SUCCESS handle successfully / others handle failed -*/ -Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, - const std::vector &input_desc, ge::RunModelData &run_output_data, - std::vector &output_desc, bool async_mode) { if (!isInit_) { GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return ACL_ERROR_GE_EXEC_NOT_INIT; @@ -1021,7 +956,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel } } - return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); + return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); } /** diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 9566ca64..4a0188be 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -61,91 +61,9 @@ local_ge_executor_src_files := \ ../single_op/task/tbe_task_builder.cc \ ../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_kernel_task_builder.cc \ + ../hybrid/hybrid_davinci_model_stub.cc\ ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ ../graph/common/local_context.cc \ - ../hybrid/common/tensor_value.cc \ - ../hybrid/common/npu_memory_allocator.cc \ - ../hybrid/executor/rt_callback_manager.cc \ - ../hybrid/executor/node_state.cc \ - ../hybrid/executor/node_done_manager.cc \ - ../hybrid/executor/hybrid_profiler.cc \ - ../hybrid/executor/hybrid_model_executor.cc \ - ../hybrid/executor/hybrid_model_async_executor.cc \ - ../hybrid/executor/hybrid_execution_context.cc \ - ../hybrid/executor/subgraph_context.cc \ - ../hybrid/executor/subgraph_executor.cc \ - ../hybrid/executor/worker/task_compile_engine.cc \ - ../hybrid/executor/worker/shape_inference_engine.cc \ - ../hybrid/executor/worker/execution_engine.cc \ - ../hybrid/model/hybrid_model.cc \ - ../hybrid/model/hybrid_model_builder.cc \ - ../hybrid/model/node_item.cc \ - ../hybrid/model/graph_item.cc \ - ../hybrid/node_executor/aicore/aicore_node_executor.cc \ - ../hybrid/node_executor/aicore/aicore_op_task.cc \ - ../hybrid/node_executor/aicore/aicore_task_builder.cc \ - ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \ - ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ - ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \ - ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ - ../hybrid/node_executor/host_cpu/kernel_factory.cc \ - ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ - ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ - ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ - ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ - ../hybrid/node_executor/controlop/control_op_executor.cc \ - ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ - ../hybrid/node_executor/rts/rts_node_executor.cc \ - ../hybrid/node_executor/node_executor.cc \ - ../hybrid/node_executor/task_context.cc \ - ../hybrid/hybrid_davinci_model.cc \ - ../ge_local_engine/engine/host_cpu_engine.cc \ - ../graph/common/omg_util.cc \ - ../graph/manager/host_mem_manager.cc \ - ../graph/build/memory/var_mem_assign_util.cc \ - ../host_kernels/transpose_kernel.cc \ - ../host_kernels/add_kernel.cc \ - ../host_kernels/broadcast_args_kernel.cc \ - ../host_kernels/broadcast_gradient_args_kernel.cc \ - ../host_kernels/cast_kernel.cc \ - ../host_kernels/concat_offset_kernel.cc \ - ../host_kernels/concat_v2_kernel.cc \ - ../host_kernels/dynamic_stitch_kernel.cc \ - ../host_kernels/identity_kernel.cc \ - ../host_kernels/empty_kernel.cc \ - ../host_kernels/expanddims_kernel.cc \ - ../host_kernels/fill_kernel.cc \ - ../host_kernels/floordiv_kernel.cc \ - ../host_kernels/floormod_kernel.cc \ - ../host_kernels/gather_v2_kernel.cc \ - ../host_kernels/greater_kernel.cc \ - ../host_kernels/kernel_utils.cc \ - ../host_kernels/maximum_kernel.cc \ - ../host_kernels/mul_kernel.cc \ - ../host_kernels/pack_kernel.cc \ - ../host_kernels/permute_kernel.cc \ - ../host_kernels/range_kernel.cc \ - ../host_kernels/rank_kernel.cc \ - ../host_kernels/reduce_prod_kernel.cc \ - ../host_kernels/reshape_kernel.cc \ - ../host_kernels/rsqrt_kernel.cc \ - ../host_kernels/shape_kernel.cc \ - ../host_kernels/shape_n_kernel.cc \ - ../host_kernels/size_kernel.cc \ - ../host_kernels/slice_d_kernel.cc \ - ../host_kernels/slice_kernel.cc \ - ../host_kernels/squeeze_kernel.cc \ - ../host_kernels/unsqueeze_kernel.cc \ - ../host_kernels/ssd_prior_box_kernel.cc \ - ../host_kernels/strided_slice_kernel.cc \ - ../host_kernels/sub_kernel.cc \ - ../host_kernels/transdata_kernel.cc \ - ../host_kernels/unpack_kernel.cc \ - ../graph/passes/pass_utils.cc \ - ../graph/common/bcast.cc \ - ../common/fp16_t.cc \ - ../common/formats/format_transfers/format_transfer_transpose.cc \ - ../common/formats/utils/formats_trans_utils.cc \ local_ge_executor_c_include := \ proto/insert_op.proto \ diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 615a968f..76590172 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES ) ############ libge_local_opskernel_builder.a ############ -add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) +add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_local_opskernel_builder_static PRIVATE -Werror diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index 0b99ecac..cc6b578c 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -20,7 +20,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "external/../register/register.h" +#include "register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index bdb02b3a..e434709a 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -30,7 +30,6 @@ #include "model/ge_model.h" #include "graph/ge_context.h" #include "opskernel_manager/ops_kernel_builder_manager.h" -#include "graph/utils/op_desc_utils.h" using domi::BuildMode; @@ -312,53 +311,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); } -static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, - const std::vector &in_anchors, const std::string &name) { - GE_CHECK_NOTNULL(out_anchor); - NodePtr in_node = out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(in_node); - OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); - OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) - .Build(); - (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); - if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -static Status GenerateTaskForConstant(const std::shared_ptr &graph) { - for (auto &node : graph->GetDirectNode()) { - // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - auto op_type = op_desc->GetType(); - if (op_type == NETOUTPUT) { - for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { - const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); - NodePtr in_node = peer_out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(in_node); - - std::string in_node_op_type = in_node->GetType(); - if (in_node_op_type == CONSTANT) { - GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); - std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; - if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { - GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); - return FAILED; - } - } - } - } - } - return SUCCESS; -} - Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, @@ -380,9 +332,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { continue; } - - GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); - if (sub_graph->GetGraphUnknownFlag()) { // unknown shape build flow GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index aa825a5d..2eeecc0f 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -274,16 +274,13 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da /// @param [in] stream stream to execute model on /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data -/// @param [in] input_desc description of model input data /// @param [out] output_data model output data -/// @param [out] output_desc description of model output data /// Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - const std::vector &input_desc, OutputData &output_data, - std::vector &output_desc) { + OutputData &output_data) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); + Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); if (ret != SUCCESS) { GELOGE(ret, "Execute model failed, model_id:%u.", model_id); return ret; diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index 974af5c1..b581f2fa 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -65,8 +65,7 @@ class GraphLoader { const std::vector &output_queue_ids); static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - const std::vector &input_desc, OutputData &output_data, - std::vector &output_desc); + OutputData &output_data); static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 81d47b3b..425ce199 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -118,8 +118,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetWeight(); std::size_t weights_size = weights.GetSize(); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); - if ((weight_ptr != nullptr) && (weight_size < weights_size)) { - GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); - return FAILED; - } - - weights_mem_base_ = static_cast(dev_ptr); - is_inner_weight_base_ = false; - - if (weights_size != 0) { - weights_mem_base_ = static_cast(weight_ptr); - is_inner_weight_base_ = false; - if (weight_ptr == nullptr) { - weights_mem_base_ = MallocWeightsMem(weights_size); - if (weights_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); - return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; - } - is_inner_weight_base_ = true; - } - GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - weights_mem_base_, weights_size); - GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); - GELOGI("copy weights data to device"); - } - - runtime_param_.weight_base = weights_mem_base_; - return SUCCESS; -} - - -Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { - if (is_feature_map_mem_has_inited_) { - GELOGE(FAILED, "call InitFeatureMapMem more than once ."); + if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { + GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); return FAILED; } - is_feature_map_mem_has_inited_ = true; - - std::size_t data_size = TotalMemSize(); - std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; - if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + if ((weight_ptr != nullptr) && (weight_size < weights_size)) { + GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); return FAILED; } mem_base_ = static_cast(dev_ptr); p2p_mem_base_ = static_cast(dev_ptr); + weights_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; + is_inner_weight_base_ = false; if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); @@ -331,14 +299,12 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } - GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); + weights_mem_base_ = mem_base_; - if (!is_inner_weight_base_) { - weights_mem_base_ = mem_base_; - is_inner_weight_base_ = true; - } is_inner_mem_base_ = true; + is_inner_weight_base_ = true; } if (p2p_data_size != 0) { @@ -347,11 +313,27 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); return GE_EXEC_ALLOC_P2P_MEM_FAILED; } - GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); is_inner_p2p_mem_base_ = true; } + if (weights_size != 0) { + weights_mem_base_ = static_cast(weight_ptr); + is_inner_weight_base_ = false; + if (weight_ptr == nullptr) { + weights_mem_base_ = MallocWeightsMem(weights_size); + if (weights_mem_base_ == nullptr) { + GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; + } + is_inner_weight_base_ = true; + } + GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + weights_mem_base_, weights_size); + GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); + } + GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; @@ -661,9 +643,8 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_TIMESTAMP_START(InitModelMem); GELOGD("Known node is %d", known_node_); - GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size)); if (!known_node_) { - GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); + GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); data_inputer_ = new (std::nothrow) DataInputer(); GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); } @@ -1160,7 +1141,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); } - return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 650f19eb..893c3d49 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -584,8 +584,7 @@ class DavinciModel { Status SyncVarData(); - Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); - Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); + Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); @@ -851,9 +850,7 @@ class DavinciModel { Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); - bool is_weight_mem_has_inited_; - bool is_feature_map_mem_has_inited_; - + bool is_model_has_inited_; uint32_t model_id_; uint32_t runtime_model_id_; string name_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index d6cdf42d..b7a0983d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -31,7 +31,6 @@ #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" -#include "hybrid/hybrid_davinci_model.h" namespace ge { thread_local uint32_t device_count = 0; @@ -205,13 +204,6 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { std::lock_guard lock(map_mutex_); - auto hybrid_davinci_model = hybrid_model_map_.find(model_id); - if (hybrid_davinci_model != hybrid_model_map_.end()) { - uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); - DestroyAicpuSession(session_id); - return SUCCESS; - } - auto it = model_map_.find(model_id); if (it == model_map_.end()) { GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); @@ -933,12 +925,6 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &output_desc, std::vector &inputFormats, std::vector &outputFormats, bool new_model_desc) { - std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - hybrid_davinci_model->SetModelDescVersion(new_model_desc); - return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); - } - std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); @@ -957,11 +943,6 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector> &batch_info, int32_t &dynamic_type) { - std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); - } - std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); @@ -994,12 +975,6 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector &user_input_shape_order) { - auto hybrid_davinci_model = GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); - return SUCCESS; - } - auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) @@ -1015,12 +990,6 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector & } Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { - std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); - return SUCCESS; - } - std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); @@ -1232,25 +1201,10 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data input data -/// @param [in] input_desc description of input data /// @param [out] output_data output data -/// @param [out] output_desc description of output data /// Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - const std::vector &input_desc, OutputData &output_data, - std::vector &output_desc) { - std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); - if (hybrid_davinci_model != nullptr) { - auto inputs = input_data.blobs; - auto outputs = output_data.blobs; - - Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); - if (status == SUCCESS) { - GELOGI("Execute model %u success.", model_id); - } - return status; - } - + OutputData &output_data) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index e3780d5b..9821a4ab 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -148,13 +148,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data - /// @param [in] input_desc description of model input data /// @param [out] output_data model output data - /// @param [out] output_desc description of model output data /// ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - const std::vector &input_desc, OutputData &output_data, - std::vector &output_desc); + OutputData &output_data); ge::Status SyncExecuteModel(uint32_t model_id, const std::vector &inputs, std::vector &outputs); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 95f13b6f..87fac994 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -26,7 +26,6 @@ #include #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" #include "framework/common/types.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" @@ -73,7 +72,7 @@ Status DynamicShapePartitioner::Partition() { } REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); - REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!"); + DumpGraph("_Before_DSP"); auto status = PartitionImpl(); GELOGD("%s.", DebugString().c_str()); @@ -87,50 +86,6 @@ Status DynamicShapePartitioner::Partition() { return status; } -Status DynamicShapePartitioner::CtrlEdgeTransfer() { - GELOGD("Do ctrl edge transfer start!"); - GE_CHECK_NOTNULL(root_graph_); - - bool is_dynamic_shape = false; - (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); - if (!is_dynamic_shape) { - return SUCCESS; - } - for (auto &subgraph : root_graph_->GetAllSubgraphs()) { - for (ge::NodePtr &n : subgraph->GetDirectNode()) { - auto op_desc = n->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - auto op_type = op_desc->GetType(); - if (op_type == CONSTANT || op_type == CONSTANTOP) { - if (n->GetInAllNodes().empty()) { - GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str()); - continue; - } - - GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str()); - - for (auto &in_control_node : n->GetInControlNodes()) { - GE_CHECK_NOTNULL(in_control_node); - GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), - n->GetInControlAnchor()), "remove edge failed"); - for (auto &out_node : n->GetOutNodes()) { - if (out_node == nullptr) { - continue; - } - GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), - out_node->GetInControlAnchor()), "add edge failed."); - } - } - } - } - } - - GELOGD("Do ctrl edge transfer end!"); - return SUCCESS; -} - Status DynamicShapePartitioner::PartitionImpl() { REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 9772615e..b0477ae8 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -151,7 +151,6 @@ class DynamicShapePartitioner { Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); - Status CtrlEdgeTransfer(); ge::ComputeGraphPtr root_graph_; // The original graph to partition std::unordered_map> node_2_cluster_; // Record nodes and the cluster it belongs to // topological sorted clusters, this field will change with the splitting. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 689510f0..21fb1eaf 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); if (node->GetType() == CAST) { trans_data_type = true; - } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { + } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { trans_format = true; trans_shape = true; } else if (node->GetType() == TRANSDATA) { diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index 97b5a0f5..02b5f996 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRC_LIST "engine/host_cpu_engine.cc" "ops_kernel_store/host_cpu_ops_kernel_info.cc" "ops_kernel_store/op/op_factory.cc" - "ops_kernel_store/op/host_op.cc" + "ops_kernel_store/op/host_op.cc" ) set(CPU_OPS_KERNEL_LIST @@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE set_target_properties(atc_host_cpu_engine PROPERTIES OUTPUT_NAME host_cpu_engine - LIBRARY_OUTPUT_DIRECTORY atclib + LIBRARY_OUTPUT_DIRECTORY atclib ) ############ libhost_cpu_opskernel_builder.so ############ @@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES ) ############ libhost_cpu_opskernel_builder.a ############ -add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) +add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 91996ab3..468a7014 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -353,44 +353,6 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } -Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc) { - GELOGI("Start to execute model."); - - HybridModelExecutor::ExecuteArgs args; - args.inputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - TensorValue tensor_value(inputs[i].data, inputs[i].length); - args.inputs[i] = tensor_value; - } - GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); - for (const auto &output_tensor_desc : args.output_desc) { - output_desc.emplace_back(*output_tensor_desc); - } - - for (size_t i = 0; i < args.outputs.size(); ++i) { - int64_t output_real_size = 0; - ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); - if (graph_status != GRAPH_SUCCESS) { - GELOGE(FAILED, "Get tensor size in bytes failed."); - return FAILED; - } - if (output_real_size > 0) { - if (outputs[i].length < static_cast(output_real_size)) { - GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", - i, outputs[i].length, output_real_size); - return FAILED; - } - GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); - } - outputs[i].length = output_real_size; - } - - return SUCCESS; -} - Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector &outputs) { GELOGD("Start to execute model."); // prepare inputs diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21833b0b..8de2beb6 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -35,11 +35,6 @@ class HybridModelAsyncExecutor { Status Init(); - Status Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc); - Status Execute(const vector &inputs, vector &outputs); Status Start(const std::shared_ptr &listener); diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index a491c9a5..d696adf9 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -38,14 +38,6 @@ class HybridDavinciModel::Impl { return SUCCESS; } - Status Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc, - rtStream_t stream) { - return executor_.Execute(inputs, input_desc, outputs, output_desc); - } - Status Execute(const vector &inputs, vector &outputs) { return executor_.Execute(inputs, outputs); } @@ -76,33 +68,6 @@ class HybridDavinciModel::Impl { executor_.SetDeviceId(device_id); } - uint64_t GetSessionId() { - return model_.GetSessionId(); - } - - Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { - return model_.GetDynamicBatchInfo(batch_info, dynamic_type); - } - - void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { - model_.GetUserDesignateShapeOrder(user_input_shape_order); - } - - void GetModelAttr(std::vector &dynamic_output_shape_info) { - model_.GetModelAttr(dynamic_output_shape_info); - } - - Status GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); - } - - void SetModelDescVersion(bool is_new_model_desc) { - model_.SetModelDescVersion(is_new_model_desc); - } - private: std::shared_ptr listener_; HybridModel model_; @@ -130,14 +95,6 @@ Status HybridDavinciModel::Init() { return impl_->Init(); } -Status HybridDavinciModel::Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc, rtStream_t stream) { - GE_CHECK_NOTNULL(impl_); - return impl_->Execute(inputs, input_desc, outputs, output_desc, stream); -} - Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { GE_CHECK_NOTNULL(impl_); return impl_->Execute(inputs, outputs); @@ -175,41 +132,5 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { impl_->SetDeviceId(device_id); } } - -Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { - GE_CHECK_NOTNULL(impl_); - return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); -} - -void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { - if (impl_ != nullptr) { - impl_->GetUserDesignateShapeOrder(user_input_shape_order); - } -} - -void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { - if (impl_ != nullptr) { - impl_->GetModelAttr(dynamic_output_shape_info); - } -} - -Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - GE_CHECK_NOTNULL(impl_); - return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); -} - -void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { - if (impl_ != nullptr) { - impl_->SetModelDescVersion(is_new_model_desc); - } -} - -uint64_t HybridDavinciModel::GetSessionId() { - GE_CHECK_NOTNULL(impl_); - return impl_->GetSessionId(); -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 5349390c..00a48c1e 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -37,12 +37,6 @@ class HybridDavinciModel { Status Init(); - Status Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc, - rtStream_t stream); - Status Execute(const vector &inputs, vector &outputs); Status ModelRunStart(); @@ -57,21 +51,6 @@ class HybridDavinciModel { void SetDeviceId(uint32_t device_id); - uint64_t GetSessionId(); - - Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); - - void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); - - void GetModelAttr(std::vector &dynamic_output_shape_info); - - Status GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats); - - void SetModelDescVersion(bool is_new_model_desc); - private: HybridDavinciModel() = default; class Impl; diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index 366845c5..b95b9efc 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -28,14 +28,6 @@ Status HybridDavinciModel::Init() { return UNSUPPORTED; } -Status HybridDavinciModel::Execute(const std::vector &inputs, - const std::vector &input_desc, - std::vector &outputs, - std::vector &output_desc, - rtStream_t stream) { - return UNSUPPORTED; -} - Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { return UNSUPPORTED; } @@ -60,29 +52,5 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { void HybridDavinciModel::SetDeviceId(uint32_t device_id) { } - -uint64_t HybridDavinciModel::GetSessionId() { - return 0; -} - -Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { - return UNSUPPORTED; -} - -void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { -} - -void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { -} - -Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - return UNSUPPORTED; -} - -void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { -} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index feb6757b..59c7be9a 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -21,18 +21,12 @@ #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/model/hybrid_model_builder.h" #include "hybrid/node_executor/node_executor.h" -#include "common/op/ge_op_utils.h" namespace ge { namespace hybrid { -namespace { -const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size -} - HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { } @@ -134,214 +128,7 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c } const string &HybridModel::GetModelName() const { - return model_name_; -} - -Status HybridModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { - // dynamic shape do not need dynamic batch - batch_info = {}; - dynamic_type = -1; - return SUCCESS; -} - -void HybridModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { - // dynamic shape do not need dynamic batch - user_input_shape_order = {}; -} - -void HybridModel::GetModelAttr(std::vector &dynamic_output_shape_info) { - dynamic_output_shape_info = {}; -} - -Status HybridModel::GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - auto node_item_list = root_graph_item_->GetInputNodes(); - if (node_item_list.empty()) { - GELOGE(FAILED, "node item list is empty!"); - return FAILED; - } - - GE_CHECK_NOTNULL(node_item_list[0]->node); - GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc()); - if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) { - GELOGE(FAILED, "input size of op is not 1!"); - return FAILED; - } - - GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); - - return SUCCESS; -} - -void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, - Format &format, InputOutputDescInfo &input) { - uint32_t n, c, h, w; - n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; - c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; - h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; - w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; - - if (model_input_dims.size() == static_cast(NORMAL_TENSOR_SIZE)) { - input.shape_info.num = model_input_dims[n]; - input.shape_info.height = model_input_dims[h]; - input.shape_info.width = model_input_dims[w]; - input.shape_info.channel = model_input_dims[c]; - } - for (auto model_input_dim : model_input_dims) { - input.shape_info.dims.push_back(model_input_dim); - } - input.shape_info.shape_ranges = shape_ranges; - return; -} - -void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { - std::vector> shape_ranges; - if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { - // When static aipp is set, need to get the model input dims which processed by aipp - vector model_input_dims; - (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); - SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, format, input); - return; - } - // judge if this data is linked dynamic aipp first, multiply batch has been considered - if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { - vector dynamic_aipp_input_dims; - (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); - SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, format, input); - return; - } else { - vector input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); - op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); - SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, format, input); - return; - } -} - -Status HybridModel::GetInputDescInfo(vector &input_desc, std::vector &formats) { - auto node_item_list = root_graph_item_->GetInputNodes(); - for (auto &node_item : node_item_list) { - InputOutputDescInfo input; - - GE_CHECK_NOTNULL(node_item->node); - auto op_desc = node_item->node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - - Format format = op_desc->GetInputDescPtr(0)->GetFormat(); - input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); - input.name = op_desc->GetName(); - - int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); - - // support dynamic shape - if (input_size < 0) { - GELOGD("dynamic shape scene, input size is unknown. " - "format=%d, data_type=%d, input_size=%ld", - format, input.data_type, input_size); - input_size = kMemSizeUnknownShape; // -1 - } - - // not support dynamic shape input for now, so input_size here will be not less than zero. - input.size = input_size; - - CreateInputDimsInfo(op_desc, format, input); - - formats.push_back(format); - input_desc.push_back(input); - } - is_new_model_desc_ = false; - return SUCCESS; -} - -void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { - GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); - Format format = output_desc->GetFormat(); - GeShape shape = output_desc->GetShape(); - std::vector> shape_ranges; - output_desc->GetShapeRange(shape_ranges); - DataType data_type = output_desc->GetDataType(); - int64_t dims[] = {1, 1, 1, 1}; - format_result = format; - if (format == FORMAT_ND) { // for ND tensor - for (size_t i = 0; i < shape.GetDimNum() && i < (sizeof(dims) / sizeof(dims[0])); i++) { - dims[i] = shape.GetDim(i); - } - } else { // FOR FORMAT_NHWC or FORMAT_NCHW - dims[0] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N); // 0: first dim - dims[1] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C); // 1: second dim - dims[2] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H); // 2: third dim - dims[3] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W); // 3: forth dim - } - output_desc_info.shape_info.num = dims[0]; // 0: first dim - output_desc_info.shape_info.channel = dims[1]; // 1: second dim - output_desc_info.shape_info.height = dims[2]; // 2: third dim - output_desc_info.shape_info.width = dims[3]; // 3: forth dim - if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK - int64_t k = shape.GetDim(0); // 0: first dim - int64_t c = shape.GetDim(1); // 1: second dim - int64_t h = shape.GetDim(2); // 2: third dim - int64_t w = shape.GetDim(3); // 3: forth dim - output_desc_info.shape_info.dims.push_back(h); - output_desc_info.shape_info.dims.push_back(w); - output_desc_info.shape_info.dims.push_back(c); - output_desc_info.shape_info.dims.push_back(k); - if (shape_ranges.size() == 4) { // 4 dims - output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2 - output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3 - output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1 - output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0 - } - format_result = FORMAT_HWCN; - } else { - for (size_t j = 0; j < shape.GetDimNum(); j++) { - output_desc_info.shape_info.dims.push_back(shape.GetDim(j)); - } - output_desc_info.shape_info.shape_ranges = shape_ranges; - } - int64_t tensor_size = 0; - (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); - output_desc_info.size = static_cast(tensor_size); - output_desc_info.data_type = output_desc->GetDataType(); -} - -Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { - std::vector output_desc_list; - GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc - - vector out_node_names; - (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); - - GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode()); - auto op_desc = root_graph_item_->GetOutputNode()->op_desc; - GE_CHECK_NOTNULL(op_desc); - - auto out_size = static_cast(op_desc->GetInputsSize()); - GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); - - for (uint32_t index = 0; index < out_size; ++index) { - string output_name; - std::vector src_name = op_desc->GetSrcName(); - std::vector src_index = op_desc->GetSrcIndex(); - if (out_size == out_node_names.size()) { - bool contains_colon = out_node_names[index].find(":") != std::string::npos; - output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); - } else { - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); - } - - InputOutputDescInfo output_desc_info; - output_desc_info.name = output_name; - - uint32_t format_result; - CreateOutput(output_desc_list[index], output_desc_info, format_result); - output_desc.push_back(output_desc_info); - formats.push_back(format_result); - } - return SUCCESS; + return model_name_; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 1ec2f8a8..11311968 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -83,30 +83,6 @@ class HybridModel { const string &GetModelName() const; - Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); - - void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); - - void GetModelAttr(std::vector &dynamic_output_shape_info); - - Status GetInputOutputDescInfo(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &outputFormats); - - Status GetInputDescInfo(vector &input_desc, std::vector &formats); - - void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result); - - Status GetOutputDescInfo(vector &output_desc, std::vector &formats); - - void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); - - void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } - - void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, - Format &format, InputOutputDescInfo &input); - private: friend class HybridModelBuilder; friend class HybridModelAsyncExecutor; @@ -125,8 +101,6 @@ class HybridModel { std::map> subgraph_items_; std::map> node_items_; - bool is_new_model_desc_ = false; // support aipp - // runtime fields uint32_t device_id_ = 0; uint32_t model_id_ = 0; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index f4da3dcf..582b36eb 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -27,8 +27,6 @@ #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" -#include "framework/common/debug/ge_log.h" -#include "graph/utils/attr_utils.h" namespace ge { namespace hybrid { @@ -39,30 +37,6 @@ const uint32_t kAlignment = 32; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; -Status SetOutputNameAttr(ComputeGraph &graph) { - vector output_names; - for (const auto &node : graph.GetDirectNode()) { - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - auto op_type = op_desc->GetType(); - if (op_type == NETOUTPUT) { - for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { - const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); - NodePtr in_node = peer_out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(in_node); - output_names.push_back(in_node->GetName()); - } - } - } - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), - GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); - return FAILED); - return SUCCESS; -} - int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { int64_t var_size = 0; auto data_type = desc.GetDataType(); @@ -965,10 +939,6 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr Status HybridModelBuilder::IndexTaskDefs() { const auto &root_graph = ge_root_model_->GetRootGraph(); - if (SetOutputNameAttr(*root_graph) != SUCCESS) { - GELOGW("Set output name attr failed."); - } - for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { auto &name = it.first; auto &ge_model = it.second; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 7ed14309..fd6387e6 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,7 +19,6 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" using optiling::OpRunInfo; @@ -37,58 +36,6 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) return SUCCESS; } -Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { - auto op_desc_ptr = make_shared(op_desc); - GE_CHECK_NOTNULL(op_desc_ptr); - auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); - if (tbe_kernel == nullptr) { - GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); - return INTERNAL_ERROR; - } - TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); - rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); - if (rt_ret != RT_ERROR_NONE) { - void *bin_handle = nullptr; - if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { - GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); - rtDevBinary_t binary; - std::string json_string; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), - GELOGI("Get original type of session_graph_id.")); - if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; - } else { - GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); - return PARAM_INVALID; - } - binary.version = 0; - binary.data = tbe_kernel->GetBinData(); - binary.length = tbe_kernel->GetBinDataSize(); - GELOGI("TBE: binary.length: %lu", binary.length); - GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); - std::string meta_data; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), - GELOGI("Get original type of json_string")); - GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); - GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); - kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); - } else { - GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); - kernel_store.ReferTBEHandle(stub_name_.c_str()); - } - std::string kernel_name; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), - GELOGI("Get original type of kernel_name")); - GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); - GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); - } - return SUCCESS; -} - Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET(ValidateTaskDef(task_def), "[%s] Failed to validate task def: [%s]", @@ -98,9 +45,6 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); - - GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); - GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..0447ade7 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -62,7 +62,6 @@ class AiCoreOpTask { static Status ValidateTaskDef(const domi::TaskDef &task_def); Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); - Status RegisterTbeHandle(const OpDesc &op_desc); std::string stub_name_; void *stub_func_ = nullptr; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 5a73126f..17dbf928 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -234,22 +234,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, ge::RunModelData &output_data, bool async_mode = false); - /// - /// @ingroup ge - /// @brief Synchronous execution of offline model(Do not create thread) - /// @param [in] uint32_t model_id: Model ID to execute - /// @param [in] void* stream: stream to execute - /// @param [in] bool async_mode: is asynchronize mode. - /// @param [in] const domi::InputData *input_data: Model input data - /// @param [in] const std::vector &input_desc: description of model input data - /// @param [out] domi::OutputData *output_data: Model output data - /// @param [out] std::vector &output_desc: description of model output data - /// @return SUCCESS handle successfully / others handle failed - /// - ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, - const std::vector &input_desc, ge::RunModelData &run_output_data, - std::vector &output_desc, bool async_mode = false); - /// /// @ingroup ge /// @brief Get weight memory size from model file From 8b010963be58ecd961ec3dc6e34ac6ef7f4b2494 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Fri, 4 Dec 2020 14:41:28 +0800 Subject: [PATCH 012/127] Feature: Support single op profiling --- ge/executor/ge_executor.cc | 2 +- ge/single_op/single_op.cc | 53 +++++++++++++++++++++++++++++++-- ge/single_op/single_op_model.cc | 7 +++++ ge/single_op/single_op_model.h | 1 + ge/single_op/task/op_task.cc | 15 ++++++++++ ge/single_op/task/op_task.h | 7 +++-- 6 files changed, 80 insertions(+), 5 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index d03a8d7b..fedd13b7 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -283,7 +283,7 @@ Status GeExecutor::Initialize() { // Start profiling Options profiling_options; profiling_options.device_id = 0; - profiling_options.job_id = ""; + profiling_options.job_id = "1"; ProfilingManager::Instance().Init(profiling_options); isInit_ = true; diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 371d7110..10cc601d 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -17,6 +17,7 @@ #include "single_op/single_op.h" #include "common/fmk_types.h" +#include "common/ge_types.h" #include "common/math/math_util.h" #include "common/profiling/profiling_manager.h" #include "framework/common/debug/ge_log.h" @@ -34,6 +35,45 @@ size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } + +Status ProfilingTaskInfo(OpTask *op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + + string model_name; + string op_name; + uint32_t model_id; + uint32_t block_dim; + if (GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name_; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} } // namespace SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { @@ -169,6 +209,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); } return ret; @@ -281,9 +322,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, } if (op_task_->GetOpTaskType() == OP_TASK_TBE) { - return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); + auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); + if (ret == SUCCESS) { + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); + } + return ret; } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); + auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); + if (aicpu_ret == SUCCESS) { + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); + } + return aicpu_ret; } else { GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 49968f4f..9009b74f 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -157,6 +157,7 @@ Status SingleOpModel::LoadAllNodes() { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); Graph graph = ge_model->GetGraph(); + model_id_ = ge_model->GetModelId(); auto compute_graph = GraphUtils::GetComputeGraph(graph); if (compute_graph == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); @@ -248,6 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); + tbe_task->SetModelArgs(model_name_, model_id_); single_op.tasks_.emplace_back(tbe_task); } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); @@ -258,6 +260,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + task->SetModelArgs(model_name_, model_id_); single_op.tasks_.emplace_back(task); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); @@ -273,6 +276,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { if (ret != SUCCESS) { return ret; } + aicpu_task->SetModelArgs(model_name_, model_id_); single_op.tasks_.emplace_back(aicpu_task); } else { // skip @@ -393,6 +397,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + tbe_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(tbe_task); } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); @@ -400,6 +405,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); + task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(task); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, @@ -446,6 +452,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { const TaskDef ©_task_def = tasks[i]; GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); } + aicpu_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(aicpu_task); } else { // skip diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 50aeb7ab..5f1c842a 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -77,6 +77,7 @@ class SingleOpModel { void ParseArgTable(TbeOpTask *task, SingleOp &op); std::string model_name_; + uint32_t model_id_ = 0; const void *ori_model_data_; uint32_t ori_model_size_; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index c3c4e5bb..f8b019e9 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -93,6 +93,21 @@ const vector &OpTask::GetWorkspaceSizes() const { return workspace_size void OpTask::SetWorkspaceSizes(const vector &workspace_sizes) { workspace_sizes_ = workspace_sizes; } +void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { + model_name_ = model_name; + model_id_ = model_id; +} + +Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, + uint32_t &block_dim) { + model_name = model_name_; + model_id = model_id_; + block_dim = block_dim_; + GE_CHECK_NOTNULL(op_desc_); + op_name = op_desc_->GetName(); + return SUCCESS; +} + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 65c77800..df80088d 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -58,6 +58,8 @@ class OpTask { virtual const void *GetIOAddr() const = 0; const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); + void SetModelArgs(std::string model_name, uint32_t model_id); + Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(rtStream_t stream); void SetIoAddrsForDump(const vector &io_addrs_for_dump) { @@ -77,6 +79,9 @@ class OpTask { DumpProperties dump_properties_; DumpOp dump_op_; OpDescPtr op_desc_; + std::string model_name_; + uint32_t model_id_ = 0; + uint32_t block_dim_ = 1; std::vector io_addrs_for_dump_; }; @@ -115,7 +120,6 @@ class TbeOpTask : public OpTask { const void *stub_func_ = nullptr; std::unique_ptr args_; size_t arg_size_ = 0; - uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; std::string stub_name_; @@ -239,7 +243,6 @@ private: std::string kernel_name_; std::unique_ptr args_; size_t arg_size_ = 0; - uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; void *io_addr_ = nullptr; bool is_custom_ = false; From fde6b4c6538d0e6f2c635a6c76b85a6eec6dac97 Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Wed, 2 Dec 2020 17:41:57 +0800 Subject: [PATCH 013/127] modify for static check --- .../formats/format_transfers/format_transfer_nchw_fz_c04.cc | 1 - ge/graph/build/graph_builder.cc | 4 ++-- ge/graph/label/case_label_maker.h | 1 - ge/graph/label/if_label_maker.h | 1 - ge/graph/label/partitioned_call_label_maker.h | 1 - ge/graph/label/while_label_maker.h | 1 - ge/graph/manager/graph_manager.cc | 2 -- ge/graph/passes/ctrl_edge_transfer_pass.cc | 1 - ge/graph/passes/switch_to_stream_switch_pass.h | 4 ++-- ge/graph/passes/variable_op_pass_bak.cc | 1 - ge/host_kernels/concat_v2_kernel.cc | 2 +- ge/hybrid/node_executor/aicore/aicore_node_executor.h | 2 +- 12 files changed, 6 insertions(+), 15 deletions(-) diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index a66aeeb4..49b19f46 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -35,7 +35,6 @@ * Padding to (N, ceil(Z/16)*16) * Last Step: View the (N, ceil(Z/16)*16) as 4D (N/16, 16, C/16, 16) and transpose to (C/16, N/16, 16, 16) */ - namespace ge { namespace formats { namespace { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index e434709a..0fa1e1ee 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -424,7 +424,7 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr } Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { - // set input_desc.size = src_node.output_desc.size + // Set the size of input_desc to 'src_node.output_desc.size' if (node_ptr->GetType() == DATA) { bool is_unknown_shape = false; GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape), @@ -447,7 +447,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { GE_IF_BOOL_EXEC(src_op == nullptr, continue); auto node_op_desc = node_ptr->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); - // set dst_node.input_desc = src_node.output_desc + // Set the input_desc of dst_node to 'src_node.output_desc' auto output_desc = src_op->GetOutputDescPtr(peer_out_anchor->GetIdx()); int64_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed!")); diff --git a/ge/graph/label/case_label_maker.h b/ge/graph/label/case_label_maker.h index 1078a906..3dbfb2bc 100644 --- a/ge/graph/label/case_label_maker.h +++ b/ge/graph/label/case_label_maker.h @@ -86,7 +86,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class CaseOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/if_label_maker.h b/ge/graph/label/if_label_maker.h index 0807f549..8b07eb96 100644 --- a/ge/graph/label/if_label_maker.h +++ b/ge/graph/label/if_label_maker.h @@ -70,7 +70,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class IfOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/partitioned_call_label_maker.h b/ge/graph/label/partitioned_call_label_maker.h index b89cb94c..3944aabd 100644 --- a/ge/graph/label/partitioned_call_label_maker.h +++ b/ge/graph/label/partitioned_call_label_maker.h @@ -54,7 +54,6 @@ | c | +---------------+ *******************************************************************************/ - namespace ge { class PartitionedCallLabelMaker : public LabelMaker { public: diff --git a/ge/graph/label/while_label_maker.h b/ge/graph/label/while_label_maker.h index 0eb0deee..6c30475b 100644 --- a/ge/graph/label/while_label_maker.h +++ b/ge/graph/label/while_label_maker.h @@ -70,7 +70,6 @@ | Node | +------------+ *******************************************************************************/ - namespace ge { class WhileOpLabelMaker : public LabelMaker { public: diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 87070e79..5b194c44 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2688,9 +2688,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { } // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. - GELOGI("Start for run graph async."); - GeRootModelPtr ge_root_model = nullptr; if (graph_manager->IsGraphNeedBuild(graph_node)) { if (graph_node->GetBuildFlag()) { diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.cc b/ge/graph/passes/ctrl_edge_transfer_pass.cc index f53dc7be..a538a10c 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -38,7 +38,6 @@ namespace ge { * \ / * B */ - Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { GELOGD("CtrlEdgeTransferPass start running"); GE_CHECK_NOTNULL(graph); diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 48725230..1681d111 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -42,9 +42,9 @@ namespace ge { +-----------+ +-----------+ | Const | | VariableV2| +-----------+ +-----------+ -*/ -/* Switch branch op optimize, Switches in same case merge to one StreamSwitch, update following nodes' input + + Switch branch op optimize, Switches in same case merge to one StreamSwitch, update following nodes' input +-----------+ / | task2 | \ diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc index 3e40e686..bcddc919 100644 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ b/ge/graph/passes/variable_op_pass_bak.cc @@ -252,7 +252,6 @@ Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusi // case 2: suppose input format of transdata not equal with out format // and input format not equal with var // so we make input format equal with var - for (auto &cur_trans : fusion_road) { if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); diff --git a/ge/host_kernels/concat_v2_kernel.cc b/ge/host_kernels/concat_v2_kernel.cc index a9f0da81..234d8c8a 100644 --- a/ge/host_kernels/concat_v2_kernel.cc +++ b/ge/host_kernels/concat_v2_kernel.cc @@ -120,7 +120,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i int &tidx, ConstGeTensorPtr &tensor) { size_t input_size = input.size(); - // N >= 2 and N + 1 >= 3 + // N + 1 is greater than or equal to 3 if (input_size < kConcatV2InputNum) { GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); return NOT_CHANGED; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 989090e9..9e92a160 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -89,7 +89,7 @@ class TaskCompilerFactory { class CompilerFunctionRegistrar { public: - CompilerFunctionRegistrar(CreateFn fn); + explicit CompilerFunctionRegistrar(CreateFn fn); ~CompilerFunctionRegistrar() = default; }; } // namespace hybrid From f9cf75d4e2a3024a1ab709151295d45a11ce6c8c Mon Sep 17 00:00:00 2001 From: l00444296 Date: Fri, 4 Dec 2020 15:28:05 +0800 Subject: [PATCH 014/127] Feature: Support single op profiling --- ge/single_op/single_op.cc | 117 +++++++++++++++++++++++++------------- ge/single_op/single_op.h | 2 + 2 files changed, 80 insertions(+), 39 deletions(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 10cc601d..2ff5f44d 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -35,45 +35,6 @@ size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } - -Status ProfilingTaskInfo(OpTask *op_task) { - if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { - return SUCCESS; - } - - string model_name; - string op_name; - uint32_t model_id; - uint32_t block_dim; - if (GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); - return ACL_ERROR_GE_PARAM_INVALID; - } - GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); - std::vector task_desc_info; - uint32_t task_id = 0; - uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = model_name_; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = block_dim; - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); - - std::vector compute_graph_info; - - auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); - return SUCCESS; -} } // namespace SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { @@ -215,6 +176,45 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } +Status SingleOp::ProfilingTaskInfo(OpTask *op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + + string model_name; + string op_name; + uint32_t model_id; + uint32_t block_dim; + if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} + void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } @@ -340,4 +340,43 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; } } + +Status DynamicSingleOp::ProfilingTaskInfo(std::unique_ptr &op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + + string model_name; + string op_name; + uint32_t model_id; + uint32_t block_dim; + if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 14ef8ce1..05b468f3 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -42,6 +42,7 @@ class SingleOp { Status ValidateArgs(const std::vector &inputs, const std::vector &outputs); Status UpdateArgs(const std::vector &inputs, const std::vector &outputs); Status GetArgs(const std::vector &inputs, const std::vector &outputs); + Status ProfilingTaskInfo(OpTask *op_task); friend class SingleOpModel; std::mutex *stream_mutex_; @@ -79,6 +80,7 @@ class DynamicSingleOp { const vector &inputs, vector &output_desc, vector &outputs); + Status ProfilingTaskInfo(std::unique_ptr &op_task); std::unique_ptr op_task_; uintptr_t resource_id_ = 0; From 4b6d45260761fbd437e969033327896f28617772 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 4 Dec 2020 16:07:22 +0800 Subject: [PATCH 015/127] update submodule parser --- parser | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser b/parser index 9e392045..47c1c18b 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 9e392045c26a57913b512d0686e1285650b62abe +Subproject commit 47c1c18b4b8e5ab38ae1e380c9f1671cbafc4aee From 30b15ad4ae43463405687cea95798a2f99198e29 Mon Sep 17 00:00:00 2001 From: dongduo Date: Fri, 4 Dec 2020 16:43:43 +0800 Subject: [PATCH 016/127] Fix code check --- ge/common/op/ge_op_utils.cc | 2 +- ge/common/profiling/profiling_manager.cc | 2 +- ge/graph/build/model_builder.cc | 8 ++++---- ge/graph/build/stream_graph_optimizer.cc | 2 +- ge/graph/passes/base_pass.h | 2 +- ge/graph/passes/bitcast_pass.cc | 4 ++-- ge/graph/passes/transop_without_reshape_fusion_pass.cc | 8 ++++---- ge/graph/passes/transpose_transdata_pass.cc | 4 ++-- ge/graph/passes/variable_op_pass_bak.cc | 4 ++-- ge/graph/passes/variable_op_pass_bak.h | 4 ++-- ge/graph/preprocess/graph_preprocess.cc | 2 +- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 2 +- ge/graph/preprocess/insert_op/util_insert_aipp_op.cc | 2 +- ge/host_kernels/strided_slice_kernel.cc | 2 +- ge/ir_build/ge_ir_build.cc | 2 +- 15 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc index 579190d6..fc2990b6 100644 --- a/ge/common/op/ge_op_utils.cc +++ b/ge/common/op/ge_op_utils.cc @@ -357,7 +357,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::TransDataHWCK2KCH const char *w_data = (const char *)input; int64_t count = h * w * c * k; - GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return ); + GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return); float *buf = new (std::nothrow) float[count](); GE_RT_VOID_CHECK_NOTNULL(buf); float *src_buff = nullptr; diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 2f0f061f..ba03d671 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -217,7 +217,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 }; char prof_options_temp[MMPA_MAX_PATH] = { 0x00 }; (void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH); - (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH ); + (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH); const char *profiling_mode = profiling_mode_temp; const char *prof_options = prof_options_temp; if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index d7039cfb..37eb499a 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -282,7 +282,7 @@ Status ModelBuilder::SetInputOutputDesc() { void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); vector src_name_list; vector src_index_list; for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { @@ -309,10 +309,10 @@ void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); auto out_control_anchor = node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return ); + GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return); vector dst_name_list; vector dst_index_list; string dst_name_temp; @@ -330,7 +330,7 @@ void ModelBuilder::AddNodeInputProperty() { dst_name_temp = ""; int64_t dst_index = kWrongIndex; // assign an impossible value to dst_index. for (const auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return ); + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return); ge::NodePtr dst_node = in_data_anchor->GetOwnerNode(); dst_name_temp = dst_name_temp.empty() ? dst_node->GetName() : dst_name_temp + ":" + dst_node->GetName(); dst_index = in_data_anchor->GetIdx(); diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 582c080b..2933d413 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -38,7 +38,7 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap continue; } for (ge::NodePtr &node : subgraph->GetDirectNode()) { - GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return ); + GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return); if ((node->GetType() == END) || (node->GetType() == PLACEHOLDER)) { node->GetOpDesc()->SetId(static_cast(node_size)); node_size++; diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index bb41691d..15d6570d 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -47,7 +47,7 @@ class BaseNodePass { /// @param node /// @return /// - virtual Status Run(NodePtr &node) = 0; + virtual Status Run(NodePtr &node) = 0lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll; virtual ~BaseNodePass() = default; diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index 8388b21a..691b0f11 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -44,11 +44,11 @@ Status BitcastPass::Run(NodePtr &node) { return PARAM_INVALID; } ge::DataType dst_data_type; - if (CheckDstDataType(op_desc, dst_data_type) != SUCCESS) { + if (CheckDstDataCheckOutputShapeType(op_desc, dst_data_type) != SUCCESS) { return PARAM_INVALID; } - if (CheckOutputShape(op_desc, dst_data_type) != SUCCESS) { + if ((op_desc, dst_data_type) != SUCCESS) { return PARAM_INVALID; } diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index d2b3f1b1..f4584cad 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -63,7 +63,7 @@ void TransOpWithoutReshapeFusionPass::SetRemainNode( continue; } GELOGI("SetRemainNode node is %s", op_desc->GetName().c_str()); - GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return ); + GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return); } } @@ -594,7 +594,7 @@ void TransOpWithoutReshapeFusionPass::GetBeginOutDescAndEndInDesc(const int inde auto out_owner_node = out_peer_anchor->GetOwnerNode(); GE_CHECK_NOTNULL_JUST_RETURN(out_owner_node); auto out_peer_op_desc = out_owner_node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "out_peer_op_desc is nullptr"); return ); + GE_IF_BOOL_EXEC(out_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "out_peer_op_desc is nullptr"); return); out_desc = out_peer_op_desc->GetInputDesc(out_peer_anchor->GetIdx()); auto in_peer_anchor = nodes_anchor.back().first; @@ -602,7 +602,7 @@ void TransOpWithoutReshapeFusionPass::GetBeginOutDescAndEndInDesc(const int inde auto in_owner_node = in_peer_anchor->GetOwnerNode(); GE_CHECK_NOTNULL_JUST_RETURN(in_owner_node); auto in_peer_op_desc = in_owner_node->GetOpDesc(); - GE_IF_BOOL_EXEC(in_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "in_peer_op_desc is nullptr"); return ); + GE_IF_BOOL_EXEC(in_peer_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "in_peer_op_desc is nullptr"); return); in_desc = in_peer_op_desc->GetOutputDesc(in_peer_anchor->GetIdx()); } @@ -734,7 +734,7 @@ void TransOpWithoutReshapeFusionPass::RemoveNousedNodes(const ComputeGraphPtr &g continue; } - GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return ); + GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return); GELOGI("remove node:%s", node->GetName().c_str()); if (graph->RemoveNode(node) != GRAPH_SUCCESS) { GELOGW("remove node failed!node:%s", node->GetName().c_str()); diff --git a/ge/graph/passes/transpose_transdata_pass.cc b/ge/graph/passes/transpose_transdata_pass.cc index 7348f143..2178eac7 100644 --- a/ge/graph/passes/transpose_transdata_pass.cc +++ b/ge/graph/passes/transpose_transdata_pass.cc @@ -217,11 +217,11 @@ void TransposeTransDataPass::CopyInputEdges(NodePtr &origin_node, NodePtr &new_n } OutDataAnchorPtr out_anchor = origin_node->GetInDataAnchor(0)->GetPeerOutAnchor(); new_in_data_anchor->UnlinkAll(); - GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return ); + GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return); // control anchor only link to control anchor GE_IF_BOOL_EXEC( - GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return ); + GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return); } bool TransposeTransDataPass::TransDataCheckAccuracySupported(const OpDescPtr &op_desc) { diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc index 3e40e686..44c17204 100644 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ b/ge/graph/passes/variable_op_pass_bak.cc @@ -319,8 +319,8 @@ Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_roa } Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops){ + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops){ vector delete_trans_type; for (auto &trans_type : first_path_trans_order) { if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h index b9fbb90e..fccd063b 100644 --- a/ge/graph/passes/variable_op_pass_bak.h +++ b/ge/graph/passes/variable_op_pass_bak.h @@ -45,8 +45,8 @@ class VariableOpPass : public GraphPass { private: Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops); Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, map> trans_type_to_changed_desc, diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index b899ee83..93b261aa 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1701,7 +1701,7 @@ Status GraphPrepare::PrepareOptimize() { try { (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); - (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass" , new MarkAgnosticPass); + (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 98712a82..7c8d9073 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -408,7 +408,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { GE_CHECK_NOTNULL(aipp_params_); string related_input_name = aipp_params_->related_input_name(); - if(related_input_name.empty()) { + if (related_input_name.empty()) { return SUCCESS; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 1b926e4b..3b37003f 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -470,7 +470,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - string error_msg = "No max size found from switchn node[" + switchn->GetName()+ "]"; + string error_msg = "No max size found from switchn node[" + switchn->GetName() + "]"; GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index 2fe74415..d52bd328 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -308,7 +308,7 @@ void StridedSliceKernel::ExpandStrideWithEllipsisMask(const size_t x_dims_num, if (orig_begin_vec.size() < x_dims_num) { for (size_t j = 1; j < (x_dims_num - orig_begin_vec.size() + 1); ++j) { orig_begin_vec.insert((orig_begin_vec.begin() + ellipsis_dim + j), 0); - orig_end_vec.insert((orig_end_vec.begin() + ellipsis_dim + j), x_dims.at(ellipsis_dim +j)); + orig_end_vec.insert((orig_end_vec.begin() + ellipsis_dim + j), x_dims.at(ellipsis_dim + j)); orig_stride_vec.insert((orig_stride_vec.begin() + ellipsis_dim + j), 1); } } diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 74aa6a60..77d5be51 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -543,7 +543,7 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { } auto ret = compute_graph->TopologicalSorting(); - if(ret != GRAPH_SUCCESS) { + if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Acl topo logical sort failed."); return ret; } From 966daf4a16b0b83e1024fc4eb9cf1c3e6f5f2780 Mon Sep 17 00:00:00 2001 From: dongduo Date: Fri, 4 Dec 2020 16:52:45 +0800 Subject: [PATCH 017/127] Fix code check --- ge/graph/passes/base_pass.h | 2 +- ge/graph/passes/bitcast_pass.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index 15d6570d..bb41691d 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -47,7 +47,7 @@ class BaseNodePass { /// @param node /// @return /// - virtual Status Run(NodePtr &node) = 0lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll; + virtual Status Run(NodePtr &node) = 0; virtual ~BaseNodePass() = default; diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index 691b0f11..e58ae3f2 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -44,7 +44,7 @@ Status BitcastPass::Run(NodePtr &node) { return PARAM_INVALID; } ge::DataType dst_data_type; - if (CheckDstDataCheckOutputShapeType(op_desc, dst_data_type) != SUCCESS) { + if (CheckDstDataType(op_desc, dst_data_type) != SUCCESS) { return PARAM_INVALID; } From 0a4d83cd10e8831250a454a5e8588637a847d906 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Fri, 4 Dec 2020 16:54:40 +0800 Subject: [PATCH 018/127] update submodule header file --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index 4176fab0..29c31bb8 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 4176fab0cb2fd4f8794061916878983afb75c8da +Subproject commit 29c31bb87d8bbe6904ab6fa72034a803fb50a746 diff --git a/parser b/parser index 47c1c18b..ba956d34 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 47c1c18b4b8e5ab38ae1e380c9f1671cbafc4aee +Subproject commit ba956d349d8ad3e864d27467f4f0119333cbadc6 From b9469aa51fd01c717d978214d8dcf0f417ab0373 Mon Sep 17 00:00:00 2001 From: cclworkaccount Date: Fri, 4 Dec 2020 17:16:12 +0800 Subject: [PATCH 019/127] update ge/graph/load/new_model_manager/task_info/hccl_task_info.cc. --- ge/graph/load/new_model_manager/task_info/hccl_task_info.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index b09a4fce..442a1383 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -279,9 +279,9 @@ Status HcclTaskInfo::SetAddrs(const std::shared_ptr &op_desc, output_data_addr = output_data_addrs_.empty() ? nullptr : output_data_addrs_[i]; } kernel_hccl_infos[i].inputDataAddr = input_data_addr; - if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER || hccl_type == HCOMREDUCE) { + if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) { kernel_hccl_infos[i].outputDataAddr = output_data_addr; - } else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE) { + } else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE || hccl_type == HCOMREDUCE) { GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type), "davinci_model: GetHcomOperationType fail!"); kernel_hccl_infos[i].outputDataAddr = output_data_addr; From bef957b60009db958ad4aecc2b1411f44bc010b7 Mon Sep 17 00:00:00 2001 From: dongduo Date: Fri, 4 Dec 2020 17:22:38 +0800 Subject: [PATCH 020/127] Fix code check --- ge/graph/passes/bitcast_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index e58ae3f2..8388b21a 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -48,7 +48,7 @@ Status BitcastPass::Run(NodePtr &node) { return PARAM_INVALID; } - if ((op_desc, dst_data_type) != SUCCESS) { + if (CheckOutputShape(op_desc, dst_data_type) != SUCCESS) { return PARAM_INVALID; } From f0eba00f7f5458bf0f23bcf96d9a0b5ae5389a51 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Fri, 4 Dec 2020 17:30:26 +0800 Subject: [PATCH 021/127] Feature: Support single op profiling --- ge/single_op/single_op.cc | 121 +++++++++++++------------------------- ge/single_op/single_op.h | 2 - 2 files changed, 41 insertions(+), 82 deletions(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 2ff5f44d..95f98b62 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -35,6 +35,45 @@ size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } + +Status ProfilingTaskInfo(OpTask *op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { + return SUCCESS; + } + + string model_name; + string op_name; + uint32_t model_id; + uint32_t block_dim; + if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); + return ACL_ERROR_GE_PARAM_INVALID; + } + GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); + std::vector task_desc_info; + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); + return ACL_ERROR_GE_PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + + std::vector compute_graph_info; + + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); + return SUCCESS; +} } // namespace SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { @@ -176,45 +215,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } -Status SingleOp::ProfilingTaskInfo(OpTask *op_task) { - if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { - return SUCCESS; - } - - string model_name; - string op_name; - uint32_t model_id; - uint32_t block_dim; - if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); - return ACL_ERROR_GE_PARAM_INVALID; - } - GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); - std::vector task_desc_info; - uint32_t task_id = 0; - uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = block_dim; - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); - - std::vector compute_graph_info; - - auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); - return SUCCESS; -} - void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } @@ -324,13 +324,13 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, if (op_task_->GetOpTaskType() == OP_TASK_TBE) { auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); if (ret == SUCCESS) { - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); } return ret; } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); if (aicpu_ret == SUCCESS) { - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); } return aicpu_ret; } else { @@ -340,43 +340,4 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; } } - -Status DynamicSingleOp::ProfilingTaskInfo(std::unique_ptr &op_task) { - if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { - return SUCCESS; - } - - string model_name; - string op_name; - uint32_t model_id; - uint32_t block_dim; - if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); - return ACL_ERROR_GE_PARAM_INVALID; - } - GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); - std::vector task_desc_info; - uint32_t task_id = 0; - uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = block_dim; - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); - - std::vector compute_graph_info; - - auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); - return SUCCESS; -} } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 05b468f3..14ef8ce1 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -42,7 +42,6 @@ class SingleOp { Status ValidateArgs(const std::vector &inputs, const std::vector &outputs); Status UpdateArgs(const std::vector &inputs, const std::vector &outputs); Status GetArgs(const std::vector &inputs, const std::vector &outputs); - Status ProfilingTaskInfo(OpTask *op_task); friend class SingleOpModel; std::mutex *stream_mutex_; @@ -80,7 +79,6 @@ class DynamicSingleOp { const vector &inputs, vector &output_desc, vector &outputs); - Status ProfilingTaskInfo(std::unique_ptr &op_task); std::unique_ptr op_task_; uintptr_t resource_id_ = 0; From 94014900dfe81801db2e4e65e64d9da3fc89e780 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Sat, 5 Dec 2020 10:08:23 +0800 Subject: [PATCH 022/127] Feature: Support single op profiling --- ge/executor/ge_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index fedd13b7..3e916916 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -283,6 +283,7 @@ Status GeExecutor::Initialize() { // Start profiling Options profiling_options; profiling_options.device_id = 0; + // job id need to be set, the value is meaningless; profiling_options.job_id = "1"; ProfilingManager::Instance().Init(profiling_options); From a31e700c3ad9c3ef316b6d271994f838b7dfe808 Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 5 Dec 2020 10:17:34 +0800 Subject: [PATCH 023/127] fixing static check --- ge/hybrid/executor/hybrid_profiler.cc | 2 +- ge/hybrid/executor/subgraph_executor.cc | 1 + ge/hybrid/executor/worker/shape_inference_engine.cc | 6 +++--- ge/hybrid/model/node_item.h | 4 ++-- .../node_executor/compiledsubgraph/known_node_executor.h | 2 +- .../node_executor/ge_local/ge_local_node_executor.cc | 8 ++++---- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ge/hybrid/executor/hybrid_profiler.cc b/ge/hybrid/executor/hybrid_profiler.cc index 7228197f..336a633f 100644 --- a/ge/hybrid/executor/hybrid_profiler.cc +++ b/ge/hybrid/executor/hybrid_profiler.cc @@ -25,7 +25,7 @@ namespace ge { namespace hybrid { namespace { const int kMaxEvents = 10000; -const int kEventDescMax = 256; +const int kEventDescMax = 512; const int kMaxEventTypes = 8; const int kIndent = 8; } diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 76a6cc37..5a464f8e 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -93,6 +93,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetName().c_str(), i); GE_CHECK_LE(i + 1, input_desc.size()); const auto &tensor_desc = input_desc[i]; + GE_CHECK_NOTNULL(tensor_desc); auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); GE_CHECK_NOTNULL(node_state); node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index bd429b21..d4019eda 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -164,7 +164,7 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co for (auto &it : fused_subgraph.input_mapping) { auto parent_tensor_desc = node_item.MutableInputDesc(it.first); GE_CHECK_NOTNULL(parent_tensor_desc); - GELOGD("Start to update shape by input[%u]", it.first); + GELOGD("Start to update shape by input[%d]", it.first); GELOGD("Update shape to [%s]", parent_tensor_desc->GetShape().ToString().c_str()); GELOGD("Update original shape to [%s]", parent_tensor_desc->GetOriginShape().ToString().c_str()); for (auto &tensor_desc : it.second) { @@ -183,12 +183,12 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co } for (auto &it : fused_subgraph.output_mapping) { - uint32_t parent_output_idx = it.first; + int parent_output_idx = it.first; const auto &op_desc = it.second; GELOGD("Update parent output[%d] by [%s]", parent_output_idx, op_desc->GetName().c_str()); auto input_desc = op_desc->MutableInputDesc(0); GE_CHECK_NOTNULL(input_desc); - auto parent_output_tensor_desc = node_item.op_desc->MutableOutputDesc(parent_output_idx); + auto parent_output_tensor_desc = node_item.MutableOutputDesc(parent_output_idx); GE_CHECK_NOTNULL(parent_output_tensor_desc); GELOGD("Update shape to [%s]", input_desc->GetShape().ToString().c_str()); GELOGD("Update original shape to [%s]", input_desc->GetOriginShape().ToString().c_str()); diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 8fac4a73..8fbdc648 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -30,8 +30,8 @@ class NodeTask; class NodeExecutor; struct FusedSubgraph { - std::map> input_mapping; - std::map output_mapping; + std::map> input_mapping; + std::map output_mapping; std::vector nodes; ComputeGraphPtr graph; }; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index fb1966b4..2dde993b 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -27,7 +27,7 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - KnownNodeTask(std::shared_ptr davinci_model) + explicit KnownNodeTask(std::shared_ptr davinci_model) : davinci_model_(davinci_model) {} diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index ee45964c..7a83641d 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -61,10 +61,10 @@ Status RefInputTask::Execute(TaskContext &context) { Status RefInputTask::RefOneByOne(TaskContext &context) { GELOGI("node %s type %s ref input one by one begin.", node_name_.c_str(), node_type_.c_str()); - uint32_t input_num = context.NumInputs(); - uint32_t output_num = context.NumOutputs(); + int input_num = context.NumInputs(); + int output_num = context.NumOutputs(); if (output_num > input_num) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %u outputs but only %u inputs, can't ref one by one.", + GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only %d inputs, can't ref one by one.", node_name_.c_str(), node_type_.c_str(), output_num, input_num); return INTERNAL_ERROR; } @@ -72,7 +72,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", + GELOGD("node %s type %s output[%d] ref input[%d] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, out_index, input->GetData()); } GELOGI("node %s type %s ref input one by one end.", node_name_.c_str(), node_type_.c_str()); From 1bdbc03b746fceea975376ca40e38a941ec2ccb6 Mon Sep 17 00:00:00 2001 From: dongduo Date: Sat, 5 Dec 2020 17:50:43 +0800 Subject: [PATCH 024/127] Fix code check --- ge/client/ge_api.cc | 2 +- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- ge/graph/load/new_model_manager/model_manager.cc | 2 +- ge/graph/optimize/mem_rw_conflict_optimize.cc | 2 +- ge/host_kernels/strided_slice_kernel.cc | 6 ++++-- ge/hybrid/model/hybrid_model_builder.cc | 6 +++--- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 +- ge/hybrid/node_executor/task_context.cc | 4 +--- tests/depends/error_manager/src/error_manager_stub.cc | 2 +- tests/depends/hccl/src/hccl_stub.cc | 10 +++++----- tests/depends/runtime/src/runtime_stub.cc | 2 +- .../graph/build/logical_stream_allocator_unittest.cc | 4 ++-- tests/ut/ge/graph/load/new_op_test_utils.h | 2 +- 13 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 9ecc3016..66958310 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -134,7 +134,7 @@ Status GEInitialize(const std::map &options) { Status GEInitialize(const std::map &options) { std::map str_options; - for (auto & option : options) { + for (auto &option : options) { if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { GELOGE(FAILED, "GEInitialize options is nullptr."); return FAILED; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index e2923286..ba7ca226 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2676,7 +2676,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); GE_CHK_RT_RET(rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b7a0983d..080ca889 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1055,7 +1055,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, + ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 2fabc035..5888471a 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -643,7 +643,7 @@ Status HandleAllreduceDuplicateInput(ComputeGraphPtr &compute_graph) { auto ret = GraphUtils::InsertNodeBetweenDataAnchors(pre_out_anchor, in_data_anchor, identity_node); GE_CHK_STATUS_RET(ret, "Fail to insert identity."); GELOGI("InsertNode %s between %s and %s successfully.", identity_node->GetName().c_str(), - pre_node->GetName().c_str(), node->GetName().c_str()); + pre_node->GetName().c_str(), node->GetName().c_str()); } } } diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index d52bd328..213f6d91 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -284,8 +284,10 @@ void StridedSliceKernel::ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_ten } void StridedSliceKernel::ExpandStrideWithEllipsisMask(const size_t x_dims_num, - const vector &x_dims, vector &orig_begin_vec, - vector &orig_end_vec, vector &orig_stride_vec) { + const vector &x_dims, + vector &orig_begin_vec, + vector &orig_end_vec, + vector &orig_stride_vec) { if (attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK) != 0) { auto end_mask = attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d5a328f3..f9564a8f 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -313,9 +313,9 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { GELOGE(INTERNAL_ERROR, - "[%s] Failed to get attr [%s]", - op_desc->GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + "[%s] Failed to get attr [%s]", + op_desc->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index db3c91cd..573739bc 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -724,7 +724,7 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); // if has input and output, need copy to ioaddr int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), - &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); + &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); GE_CHK_BOOL_RET_STATUS(cpy_ret == 0, INTERNAL_ERROR, "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index b7152878..77004f99 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -233,9 +233,7 @@ Status TaskContext::AllocateOutput(int index, } else { GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", - node_item_->NodeName().c_str(), - index, - outputs_start_[index].GetSize()); + node_item_->NodeName().c_str(), index, outputs_start_[index].GetSize()); } } } diff --git a/tests/depends/error_manager/src/error_manager_stub.cc b/tests/depends/error_manager/src/error_manager_stub.cc index 4f6b6b3d..edf5a487 100644 --- a/tests/depends/error_manager/src/error_manager_stub.cc +++ b/tests/depends/error_manager/src/error_manager_stub.cc @@ -58,7 +58,7 @@ /// @param [in] value: vector parameter value /// void ErrorManager::ATCReportErrMessage(std::string error_code, const std::vector &key, - const std::vector &value) { + const std::vector &value) { } /// diff --git a/tests/depends/hccl/src/hccl_stub.cc b/tests/depends/hccl/src/hccl_stub.cc index 1cc8fdb3..b9b9d4f6 100644 --- a/tests/depends/hccl/src/hccl_stub.cc +++ b/tests/depends/hccl/src/hccl_stub.cc @@ -19,26 +19,26 @@ #include "hccl/hcom.h" HcclResult hcom_all_gather(const char *tag, void *input_count_ptr, void *output_ptr, u64 input_count, - HcclDataType data_type, const char *group, rtStream_t stream) { + HcclDataType data_type, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType data_type, u32 root, - const char *group, rtStream_t stream) { + const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_all_reduce(const char *tag, void *input_ptr, void *output_ptr, u64 count, HcclDataType data_type, - HcclReduceOp op, const char *group, rtStream_t stream) { + HcclReduceOp op, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 max_segment_num, - u32 *segment_num, u32 *segment_idx) { + u32 *segment_num, u32 *segment_idx) { return HCCL_SUCCESS; } HcclResult hcom_reduce_scatter(const char *tag, void *input_ptr, void *output_ptr, u64 count, - HcclDataType data_type, HcclReduceOp op, const char *group, rtStream_t stream) { + HcclDataType data_type, HcclReduceOp op, const char *group, rtStream_t stream) { return HCCL_SUCCESS; } diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 2ab6684d..75eefdd1 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -325,7 +325,7 @@ rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback) } rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, - rtMallocHostSharedMemoryOut *out) + rtMallocHostSharedMemoryOut *out) { out->ptr = new uint8_t[in->size]; out->devPtr = new uint8_t[in->size]; diff --git a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc index 68416409..5b87939f 100644 --- a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc +++ b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc @@ -306,8 +306,8 @@ class UtestLogicalStreamAllocator : public testing::Test { max_parallel_num["aicpu"] = parallel_num; Status status = AssignLogicalStreams({const1, const2, get_next, genmask1, genmask2, domask, subgraph4, subgraph5, - subgraph6, allreduce1, allreduce2, apply1, apply2}, - confs, max_parallel_num); + subgraph6, allreduce1, allreduce2, apply1, apply2}, + confs, max_parallel_num); EXPECT_EQ(status, ge::SUCCESS); EXPECT_EQ(GetStream(get_next), 0); diff --git a/tests/ut/ge/graph/load/new_op_test_utils.h b/tests/ut/ge/graph/load/new_op_test_utils.h index 325a3f1f..4cbc78ac 100644 --- a/tests/ut/ge/graph/load/new_op_test_utils.h +++ b/tests/ut/ge/graph/load/new_op_test_utils.h @@ -154,7 +154,7 @@ class OmeTestOpUtils { if (model->HasAttr(MODEL_ATTR_TASKS)) { ge::Buffer task_buffer; GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetZeroCopyBytes(model, MODEL_ATTR_TASKS, task_buffer), FAILED, - "Get bytes failed."); + "Get bytes failed."); std::shared_ptr task = ge::MakeShared(); GE_CHECK_NOTNULL(task); GE_IF_BOOL_EXEC(task_buffer.GetData() == nullptr, GELOGE(FAILED, "Get data fail"); return FAILED); From 8567f6fe4cc444083b99e50f8491955c849948ff Mon Sep 17 00:00:00 2001 From: lianghao Date: Mon, 7 Dec 2020 11:13:18 +0800 Subject: [PATCH 025/127] decrease om size --- ge/CMakeLists.txt | 1 + ge/ge_inference.mk | 1 + ge/ge_runner.mk | 1 + ge/graph/build/model_builder.cc | 1 + ge/graph/manager/graph_manager.cc | 5 + ge/graph/passes/attach_stream_label_pass.cc | 28 +- ge/graph/passes/attach_stream_label_pass.h | 4 +- ge/graph/passes/base_pass.cc | 2 +- .../common_subexpression_elimination_pass.cc | 3 +- ge/graph/passes/const_pass.cc | 55 +++ ge/graph/passes/const_pass.h | 29 ++ ge/graph/passes/dimension_adjust_pass.cc | 64 +++ ge/graph/passes/dimension_adjust_pass.h | 4 + ge/graph/passes/enter_pass.cc | 48 ++- ge/graph/passes/enter_pass.h | 3 +- ge/graph/passes/folding_pass.cc | 5 +- ge/graph/passes/merge_to_stream_merge_pass.cc | 10 - ge/graph/passes/next_iteration_pass.cc | 262 ++++-------- ge/graph/passes/next_iteration_pass.h | 16 +- ge/graph/preprocess/multi_batch_copy_graph.cc | 401 +++++++++++++++--- ge/graph/preprocess/multi_batch_copy_graph.h | 16 +- 21 files changed, 664 insertions(+), 295 deletions(-) create mode 100644 ge/graph/passes/const_pass.cc create mode 100644 ge/graph/passes/const_pass.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 88a5c52f..b037f4a4 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -154,6 +154,7 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 0987f148..fe76a612 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -189,6 +189,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/control_trigger_pass.cc \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ + graph/passes/const_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ graph/passes/assign_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index a2679ed1..58ad1266 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -123,6 +123,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/const_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ graph/passes/dimension_compute_pass.cc \ diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 37eb499a..3be45895 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -224,6 +224,7 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ GeTensorDesc &tensor_desc = weight->MutableTensorDesc(); size_t output_size = weight->GetData().size(); TensorUtils::SetDataOffset(tensor_desc, mem_offset); + GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size); mem_offset += output_size; } return SUCCESS; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 5b194c44..0e731ca3 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -56,6 +56,7 @@ #include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" +#include "graph/passes/const_pass.cc" #include "graph/passes/control_trigger_pass.h" #include "graph/passes/ctrl_edge_transfer_pass.h" #include "graph/passes/dimension_adjust_pass.h" @@ -2186,6 +2187,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { TransposeTransDataPass transpose_transdata_pass; TransOpSymmetryEliminationPass symmetry_elimination_pass; DimensionComputePass dimension_compute_pass; + ConstPass const_pass; names_to_passes.emplace_back("EnterPass", &enter_pass); names_to_passes.emplace_back("AddNPass", &addn_pass); names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); @@ -2199,6 +2201,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); + names_to_passes.emplace_back("ConstPass", &const_pass); GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2"); @@ -2239,6 +2242,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", new (std::nothrow) VariableRefUselessControlOutDeletePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) + GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::CommonSubexpressionEliminationPass", + new (std::nothrow) CommonSubexpressionEliminationPass)); if (options_.train_graph_flag) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index c0e0f669..cd3509c7 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -18,6 +18,8 @@ #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" +using std::string; + namespace ge { Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { GELOGD("AttachStreamLabelPass Enter."); @@ -187,21 +189,10 @@ Status AttachStreamLabelPass::UpdateEnterNode() { } std::stack enter_nodes; - std::string batch_label; for (const auto &enter_node : pair.second) { enter_nodes.emplace(enter_node); - std::string tmp_label; - (void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - if (!tmp_label.empty()) { - if (batch_label.empty()) { - batch_label = tmp_label; - } else if (batch_label != tmp_label) { - GELOGE(FAILED, "multi batch_label exist, label1=%s, label2=%s.", batch_label.c_str(), tmp_label.c_str()); - return FAILED; - } - } } - if (UpdateLoopBranch(enter_nodes, active_label_list[0], batch_label) != SUCCESS) { + if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) { GELOGE(FAILED, "Update stream_label for loop_branch failed."); return FAILED; } @@ -226,10 +217,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no } for (const auto &enter_node : enter_nodes) { - GE_CHECK_NOTNULL(enter_node->GetOpDesc()); - if (enter_node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL)) { - GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); - } + GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); } return SUCCESS; } @@ -241,8 +229,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no /// @param [in] batch_label /// @return Status /// -Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, - const std::string &batch_label) { +Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const string &stream_label) { std::stack nodes(enter_nodes); NodePtr cur_node = nullptr; while (!nodes.empty()) { @@ -251,11 +238,6 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ for (const NodePtr &out_node : cur_node->GetOutAllNodes()) { OpDescPtr out_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_desc); - std::string tmp_label; - (void)AttrUtils::GetStr(out_desc, ATTR_NAME_BATCH_LABEL, tmp_label); - if (!tmp_label.empty() && (tmp_label != batch_label)) { - continue; - } std::string out_type = out_desc->GetType(); bool need_skip = out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index 19f11480..ad71d58f 100755 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -58,11 +58,9 @@ class AttachStreamLabelPass : public GraphPass { /// @brief Update stream_label for loop_branch /// @param [in] enter_nodes /// @param [in] stream_label - /// @param [in] batch_label /// @return Status /// - static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, - const std::string &batch_label); + static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label); /// /// @brief Update stream_label start with enter nodes diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 68efbeb9..8d0bcf25 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -96,7 +96,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder node->GetName().c_str(), node->GetType().c_str()); continue; } - if (node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { + if (node_to_re_pass->IsAllInNodesSeen(nodes_seen) || node_to_re_pass->GetType() == ENTER) { GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); nodes_re_pass.insert(node_to_re_pass); } else { diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index a4662d5d..9e771b65 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -58,7 +58,8 @@ std::string GetCseKey(const NodePtr &node) { /// To avoid delete wrong nodes(e.g. stateful nodes), /// only nodes have folding kernel will be considered for the CSE process bool IsNodeSupportCse(const NodePtr &node) { - if (HostCpuEngine::CheckSupported(NodeUtils::GetNodeType(*node))) { + if (HostCpuEngine::CheckSupported(NodeUtils::GetNodeType(*node)) || node->GetType() == CONSTANT || + node->GetType() == CONSTANTOP) { return true; } return folding_pass::GetKernelByType(node) != nullptr; diff --git a/ge/graph/passes/const_pass.cc b/ge/graph/passes/const_pass.cc new file mode 100644 index 00000000..42b3c23f --- /dev/null +++ b/ge/graph/passes/const_pass.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/const_pass.h" + +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" + +namespace ge { +Status ConstPass::Run(NodePtr &node) { + GE_CHECK_NOTNULL(node); + + if ((node->GetType() != CONSTANT) && (node->GetType() != CONSTANTOP)) { + return SUCCESS; + } + GELOGD("ConstPass running, node: %s.", node->GetName().c_str()); + + // const has no control input + if (node->GetInControlNodes().empty()) { + auto out_ctrl_anchor = node->GetOutControlAnchor(); + if (out_ctrl_anchor != nullptr) { + GELOGD("Node: %s unlink all out control edge.", node->GetName().c_str()); + out_ctrl_anchor->UnlinkAll(); + } + + if (node->GetOutAllNodes().empty()) { + // it is an isolated const, just remove it. + GELOGD("Delete isolated const: %s.", node->GetName().c_str()); + auto graph = node->GetOwnerComputeGraph(); + if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Remove const %s failed.", node->GetName().c_str()); + return FAILED; + } + AddNodeDeleted(node); + } + } + + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/const_pass.h b/ge/graph/passes/const_pass.h new file mode 100644 index 00000000..a7e011ec --- /dev/null +++ b/ge/graph/passes/const_pass.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_CONST_PASS_H_ +#define GE_GRAPH_PASSES_CONST_PASS_H_ + +#include "graph/passes/base_pass.h" + +namespace ge { +class ConstPass : public BaseNodePass { + public: + Status Run(NodePtr &node) override; +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_CONST_PASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index fc5fe69f..bfb9cb4f 100755 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -80,7 +80,71 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { } } + ret = DealWithInNodes(node); + if (ret != SUCCESS) { + GELOGE(ret, "DealWithInNodes of %s failed.", node->GetName().c_str()); + return ret; + } + std::vector data_relink_io_map = {kDataInputIndex}; return IsolateAndDeleteNode(node, data_relink_io_map); } + +Status DimensionAdjustPass::DealWithInNodes(NodePtr &node) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(node->GetOpDesc()); + auto graph = node->GetOwnerComputeGraph(); + auto in_data_anchors = node->GetAllInDataAnchors(); + for (auto &in_data_anchor : in_data_anchors) { + if (in_data_anchor == nullptr) { + continue; + } + auto in_node_anchor = in_data_anchor->GetPeerOutAnchor(); + if (in_node_anchor == nullptr) { + continue; + } + auto in_node = in_node_anchor->GetOwnerNode(); + if (in_node->GetType() == SWITCHN) { + GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); + auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); + auto identity = + AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); + GE_CHECK_NOTNULL(identity); + GELOGI("Create new identity node[%s] success.", identity->GetName().c_str()); + GE_CHK_STATUS_RET(GraphUtils::AddEdge(in_node_anchor, identity->GetInDataAnchor(0))) + GE_CHECK_NOTNULL(identity->GetOutControlAnchor()); + if (identity->GetOutControlAnchor()->IsLinkedWith(node->GetInControlAnchor())) { + continue; + } + GE_CHK_STATUS_RET(GraphUtils::AddEdge(identity->GetOutControlAnchor(), node->GetInControlAnchor())) + } + } + + return SUCCESS; +} + +NodePtr DimensionAdjustPass::AddIdentityNodeToGraph(const string &name, const GeTensorDesc &tensor, + ComputeGraphPtr &graph) { + if (graph == nullptr) { + GELOGE(INTERNAL_ERROR, "Comput graph ptr is null in creating identity node."); + return nullptr; + } + + OpDescPtr desc = MakeShared("", ""); + if (desc == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to create op desc."); + return nullptr; + } + + desc->SetName(name); + desc->SetType(IDENTITY); + auto ret = desc->AddInputDesc(tensor); + auto ret2 = desc->AddOutputDesc(tensor); + if ((ret != GRAPH_SUCCESS) || (ret2 != GRAPH_SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Failed to add input/output desc in creating identity."); + return nullptr; + } + + return graph->AddNodeFront(desc); +} } // namespace ge diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 685d9694..7766f140 100755 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -34,6 +34,10 @@ namespace ge { class DimensionAdjustPass : public BaseNodePass { public: Status Run(ge::NodePtr &node) override; + + private: + Status DealWithInNodes(ge::NodePtr &node); + NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tensor, ComputeGraphPtr &graph); }; } // namespace ge diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index afeca78f..20e60403 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -23,6 +23,7 @@ namespace { const size_t kOutNodesNum = 1; +const size_t kInCtrlNodesNum = 1; } namespace ge { @@ -55,6 +56,7 @@ Status EnterPass::Run(NodePtr &node) { if (out_ctrl_node == nullptr) { continue; } + GELOGD("Remove control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); @@ -62,8 +64,12 @@ Status EnterPass::Run(NodePtr &node) { } } } else { - if (OptimizeEnter(node, in_node) != SUCCESS) { - GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); + if (OptimizeEnterWithOnlyOutData(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] with only out data node failed.", node->GetName().c_str()); + return FAILED; + } + if (UnlinkCtrlEdgeBeforeConst(node) != SUCCESS) { + GELOGE(FAILED, "Unlink control edge before const of node[%s]'s out nodes failed.", node->GetName().c_str()); return FAILED; } } @@ -72,7 +78,7 @@ Status EnterPass::Run(NodePtr &node) { return SUCCESS; } -Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { +Status EnterPass::OptimizeEnterWithOnlyOutData(NodePtr &node, NodePtr &in_node) { if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } @@ -83,17 +89,45 @@ Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { } GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))) const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)) + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)) } - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)) AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; } + +Status EnterPass::UnlinkCtrlEdgeBeforeConst(NodePtr &node) { + auto out_ctrl_nodes = node->GetOutControlNodes(); + if (out_ctrl_nodes.empty()) { + return SUCCESS; + } + auto out_ctrl_anchor = node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_ctrl_anchor); + + for (auto &out_ctrl_node : out_ctrl_nodes) { + GE_CHECK_NOTNULL(out_ctrl_node); + if ((out_ctrl_node->GetType() != CONSTANT) && (out_ctrl_node->GetType() != CONSTANTOP)) { + continue; + } + auto in_ctrl_nodes = out_ctrl_node->GetInControlNodes(); + if (in_ctrl_nodes.size() != kInCtrlNodesNum) { + continue; + } + GE_CHK_STATUS_RET(out_ctrl_anchor->Unlink(out_ctrl_node->GetInControlAnchor())) + auto out_nodes_of_const = out_ctrl_node->GetOutAllNodes(); + for (auto &out_node_of_const : out_nodes_of_const) { + if (!out_ctrl_anchor->IsLinkedWith(out_node_of_const->GetInControlAnchor())) { + GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(out_node_of_const->GetInControlAnchor())) + } + } + } + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 677516ff..67366297 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -25,7 +25,8 @@ class EnterPass : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status OptimizeEnter(NodePtr &node, NodePtr &in_node); + Status OptimizeEnterWithOnlyOutData(NodePtr &node, NodePtr &in_node); + Status UnlinkCtrlEdgeBeforeConst(NodePtr &node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc index 93dc2c40..227a0f61 100755 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -173,10 +173,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { continue; } auto in_node = in_node_anchor->GetOwnerNode(); - if (in_node == nullptr) { - continue; - } - if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH)) { + if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH) || (in_node->GetType() == SWITCHN)) { GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); auto ret = in_node_anchor->Unlink(in_data_anchor); if (ret != SUCCESS) { diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 103fbb1b..c1a57a61 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -89,16 +89,6 @@ Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, co GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed"); } - if (merge_op_desc->HasAttr(ATTR_NAME_BATCH_LABEL)) { - string batch_label; - (void)AttrUtils::GetStr(merge_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (!batch_label.empty()) { - auto stream_merge_desc = stream_merge->GetOpDesc(); - GE_CHECK_NOTNULL(stream_merge_desc); - (void)AttrUtils::SetStr(stream_merge_desc, ATTR_NAME_BATCH_LABEL, batch_label); - } - } - return AddActiveNodes(graph, stream_merge); } diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index d8c4779d..cf46f09d 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -19,6 +19,8 @@ #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" +using std::string; + namespace ge { Status NextIterationPass::Run(ComputeGraphPtr graph) { GELOGD("NextIterationPass Enter"); @@ -35,10 +37,6 @@ Status NextIterationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } } - if (GroupWithNoBatch(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Group enter_nodes failed without batch_label attr."); - return INTERNAL_ERROR; - } if (FindWhileGroups() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Find while groups failed."); @@ -73,75 +71,22 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { return FAILED; } - std::string batch_label; - (void)ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - auto frame_iter = frame_enter_map_.find(frame_name); - if (frame_iter == frame_enter_map_.end()) { - std::vector enter_nodes; - enter_nodes.emplace_back(enter_node); - frame_enter_map_[frame_name] = enter_nodes; - } else { - frame_iter->second.emplace_back(enter_node); - } - return SUCCESS; + string batch_label; + if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + frame_name += batch_label; } - auto group_iter = loop_group_map_.find(frame_name); - if (group_iter == loop_group_map_.end()) { + auto iter = loop_group_map_.find(frame_name); + if (iter == loop_group_map_.end()) { LoopCondGroupPtr loop_group = MakeShared(); if (loop_group == nullptr) { GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); return FAILED; } loop_group->enter_nodes.emplace_back(enter_node); - loop_group_map_[frame_name][batch_label] = loop_group; + loop_group_map_[frame_name] = loop_group; } else { - auto batch_iter = group_iter->second.find(batch_label); - if (batch_iter == group_iter->second.end()) { - LoopCondGroupPtr loop_group = MakeShared(); - if (loop_group == nullptr) { - GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); - return FAILED; - } - loop_group->enter_nodes.emplace_back(enter_node); - group_iter->second[batch_label] = loop_group; - } else { - batch_iter->second->enter_nodes.emplace_back(enter_node); - } - } - - return SUCCESS; -} - -/// -/// @brief Group Enter nodes without batch_label attr -/// @param [in] compute_graph -/// @return Status -/// -Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { - if (frame_enter_map_.empty()) { - GELOGI("All enter nodes in graph %s has batch_label attr.", graph->GetName().c_str()); - return SUCCESS; - } - for (const auto &item : frame_enter_map_) { - const std::string &frame_name = item.first; - auto iter = loop_group_map_.find(frame_name); - if (iter == loop_group_map_.end()) { - LoopCondGroupPtr loop_group = MakeShared(); - if (loop_group == nullptr) { - GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); - return FAILED; - } - loop_group->enter_nodes = item.second; - loop_group_map_[frame_name][""] = loop_group; - } else { - for (auto &batch_item : iter->second) { - for (const auto &enter_node : item.second) { - batch_item.second->enter_nodes.emplace_back(enter_node); - } - } - } + iter->second->enter_nodes.emplace_back(enter_node); } return SUCCESS; @@ -154,55 +99,39 @@ Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { Status NextIterationPass::FindWhileGroups() { for (const auto &loop_group_iter : loop_group_map_) { const std::string &frame_name = loop_group_iter.first; - for (const auto &batch_iter : loop_group_iter.second) { - const std::string &batch_label = batch_iter.first; - for (const auto &enter_node : batch_iter.second->enter_nodes) { - for (const auto &out_node : enter_node->GetOutAllNodes()) { - GELOGI("Find while_group for enter_node %s, frame_name:%s, batch_label:%s.", enter_node->GetName().c_str(), - frame_name.c_str(), batch_label.c_str()); - if ((out_node->GetType() != MERGE) && (out_node->GetType() != REFMERGE)) { - continue; - } - std::string tmp_label; - GE_CHECK_NOTNULL(out_node->GetOpDesc()); - (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); - if (need_skip) { - continue; - } - - NodePtr next_node = nullptr; - if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s", - out_node->GetName().c_str()); - return INTERNAL_ERROR; - } - batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); - - NodePtr switch_node = nullptr; - if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s", - out_node->GetName().c_str()); - return INTERNAL_ERROR; - } - if (switch_node == nullptr) { - continue; - } - - NodePtr loop_cond = nullptr; - if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s", - switch_node->GetName().c_str()); - return INTERNAL_ERROR; - } - if (batch_iter.second->loop_cond == nullptr) { - batch_iter.second->loop_cond = loop_cond; - } else if (batch_iter.second->loop_cond != loop_cond) { - GELOGE(FAILED, "Multi LoopCond nodes exist."); - return FAILED; - } + for (const auto &enter_node : loop_group_iter.second->enter_nodes) { + for (const auto &out_node : enter_node->GetOutAllNodes()) { + const string &type = out_node->GetType(); + if ((type != MERGE) && (type != REFMERGE)) { + continue; + } + + NodePtr next_node = nullptr; + if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s", frame_name.c_str()); + return INTERNAL_ERROR; + } + loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); + + NodePtr switch_node = nullptr; + if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str()); + return INTERNAL_ERROR; + } + if (switch_node == nullptr) { + continue; + } + + NodePtr loop_cond = nullptr; + if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); + return INTERNAL_ERROR; + } + if (loop_group_iter.second->loop_cond == nullptr) { + loop_group_iter.second->loop_cond = loop_cond; + } else if (loop_group_iter.second->loop_cond != loop_cond) { + GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str()); + return FAILED; } } } @@ -223,18 +152,16 @@ bool NextIterationPass::VerifyWhileGroup() { GELOGE(INTERNAL_ERROR, "Verify while group failed, frame_name is empty."); return false; } - for (const auto &batch_iter : loop_group_iter.second) { - if (batch_iter.second->loop_cond == nullptr) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); - return false; - } + if (loop_group_iter.second->loop_cond == nullptr) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); + return false; + } - for (const auto &pair_iter : batch_iter.second->merge_next_pairs) { - if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", - frame_name.c_str()); - return false; - } + for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) { + if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", + frame_name.c_str()); + return false; } } } @@ -249,56 +176,53 @@ bool NextIterationPass::VerifyWhileGroup() { /// Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { for (const auto &loop_cond_iter : loop_group_map_) { - for (const auto &batch_iter : loop_cond_iter.second) { - const std::string &cond_name = batch_iter.second->loop_cond->GetName(); - GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); - - // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge - NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); - NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); - if ((enter_active == nullptr) || (next_active == nullptr)) { - GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); + const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName(); + GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); + + // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge + NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); + NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); + if ((enter_active == nullptr) || (next_active == nullptr)) { + GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); + return INTERNAL_ERROR; + } + + for (const auto &enter_node : loop_cond_iter.second->enter_nodes) { + // Enter --> Active + if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge from %s to %s failed.", enter_node->GetName().c_str(), + enter_active->GetName().c_str()); return INTERNAL_ERROR; } + } - for (const auto &enter_node : batch_iter.second->enter_nodes) { - // Enter --> Active - if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != - GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } + for (const auto &pair : loop_cond_iter.second->merge_next_pairs) { + NodePtr merge_node = pair.first; + NodePtr next_node = pair.second; + // Active --> Merge + if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; } - for (const auto &pair : batch_iter.second->merge_next_pairs) { - NodePtr merge_node = pair.first; - NodePtr next_node = pair.second; - // Active --> Merge - if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != - GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } - - // NextIteration --> Active - if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } - - // break link between NextIteration and Merge - if (BreakNextIteration(next_node, merge_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); - return INTERNAL_ERROR; - } + // NextIteration --> Active + if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; } - if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || - (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); + // break link between NextIteration and Merge + if (BreakNextIteration(next_node, merge_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); return INTERNAL_ERROR; } } + + if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || + (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); + return INTERNAL_ERROR; + } } return SUCCESS; @@ -365,12 +289,11 @@ Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr & /// @param [in] node /// @param [in] target_type /// @param [in] is_input -/// @param [in] batch_label /// @param [out] target_node /// @return Status /// Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - const std::string &batch_label, NodePtr &target_node) { + NodePtr &target_node) { if (node == nullptr) { GELOGE(PARAM_INVALID, "node is null."); return PARAM_INVALID; @@ -387,12 +310,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string } for (const auto &tmp_node : nodes) { - std::string tmp_label; - (void)AttrUtils::GetStr(tmp_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); - if (need_skip) { - continue; - } const std::string type = tmp_node->GetType(); if ((target_type == LOOPCOND) && (type == target_type)) { target_node = tmp_node; @@ -415,7 +332,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string /// @return SUCCESS /// Status NextIterationPass::ClearStatus() { - frame_enter_map_.clear(); loop_group_map_.clear(); return SUCCESS; } diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index f8223c20..3266254d 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -46,13 +46,6 @@ class NextIterationPass : public GraphPass { /// Status GroupEnterNode(const NodePtr &enter_node); - /// - /// @brief Group Enter nodes without batch_label attr - /// @param [in] compute_graph - /// @return Status - /// - Status GroupWithNoBatch(const ComputeGraphPtr &graph); - /// /// @brief Find while groups /// @return Status @@ -97,13 +90,10 @@ class NextIterationPass : public GraphPass { /// @param [out] target_node /// @return Status /// - Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - const std::string &batch_label, NodePtr &target_node); + Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node); - // map> - std::unordered_map> frame_enter_map_; - // map> - std::unordered_map> loop_group_map_; + // map + std::unordered_map loop_group_map_; }; } // namespace ge #endif // GE_GRAPH_PASSES_NEXT_ITERATION_PASS_H_ diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 9ab74d70..b1fb3bbd 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -44,6 +44,8 @@ using std::set; using std::string; using std::vector; +using std::map; +using std::queue; namespace ge { namespace multibatch { @@ -57,10 +59,15 @@ const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; const int kDivisionConst = 2; +const int32_t kOneInDataNode = 1; +const int32_t kFindNoMatch = 0; inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } +inline bool IsEnterType(const string &node_type) { return (node_type == ENTER) || (node_type == REFENTER); } +const set unchange_types({CONSTANT, CONSTANTOP, ENTER, REFENTER}); + inline bool IsGetNextType(const NodePtr &node) { std::string original_type; GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, @@ -218,12 +225,6 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } - ret = InsertIdentityAfterSwitchN(); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to insert identity nodes after switchn node."); - return INTERNAL_ERROR; - } - GELOGI("Begin to remove useless nodes by prune pass after copy process"); PrunePass prune_pass; ret = prune_pass.Run(graph_); @@ -240,6 +241,18 @@ Status MultiBatchGraphCopyer::Init() { return ret; } + ret = RelinkConstCtrlEdge(); + if (ret != SUCCESS) { + GELOGE(FAILED, "Relink const's control edge failed."); + return FAILED; + } + + ret = ExtractUnchangedStructureOutofCycle(); + if (ret != SUCCESS) { + GELOGE(FAILED, "Extract unchanged structure out of cycle failed."); + return FAILED; + } + for (auto &node : graph_->GetAllNodes()) { origin_all_nodes_.emplace_back(node); if (IsDataLikeType(node->GetType())) { @@ -252,6 +265,281 @@ Status MultiBatchGraphCopyer::Init() { return SUCCESS; } +Status MultiBatchGraphCopyer::RelinkConstCtrlEdge() { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) { + if (node->GetOutDataNodes().empty()) { + continue; + } + if (!node->GetInControlNodes().empty()) { + auto in_ctrl_nodes = node->GetInControlNodes(); + auto out_nodes = node->GetOutAllNodes(); + bool has_merge = false; + for (const auto &out_node : out_nodes) { + GE_CHECK_NOTNULL(out_node); + if (out_node->GetType() == MERGE || out_node->GetType() == REFMERGE) { + has_merge = true; + break; + } + } + if (has_merge) { + continue; + } + auto in_ctrl_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + in_ctrl_anchor->UnlinkAll(); + for (auto &in_ctrl_node : in_ctrl_nodes) { + auto out_ctrl_anchor_of_in_ctrl_node = in_ctrl_node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_ctrl_anchor_of_in_ctrl_node); + for (auto &out_node : out_nodes) { + if (IsEnterType(out_node->GetType())) { + continue; + } + if (!out_ctrl_anchor_of_in_ctrl_node->IsLinkedWith(out_node->GetInControlAnchor())) { + GE_CHK_STATUS_RET(out_ctrl_anchor_of_in_ctrl_node->LinkTo(out_node->GetInControlAnchor())) + } + } + } + } + auto out_ctrl_anchor = node->GetOutControlAnchor(); + if (out_ctrl_anchor != nullptr) { + out_ctrl_anchor->UnlinkAll(); + } + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::ExtractUnchangedStructureOutofCycle() { + map> frame_enter; + if (GetEnterNodesGroupByFrame(frame_enter) != SUCCESS) { + GELOGE(FAILED, "Get enter nodes grouped by frame_name failed."); + return FAILED; + } + + queue nodes_to_extract; + if (GetNodeNeedExtract(frame_enter, nodes_to_extract) != SUCCESS) { + GELOGE(FAILED, "Get nodes needed to extract failed."); + return FAILED; + } + + while (!nodes_to_extract.empty()) { + auto node = nodes_to_extract.front(); + nodes_to_extract.pop(); + OpDescPtr enter_desc = nullptr; + if (MoveInEntersInDataAnchorDown(node, enter_desc) != SUCCESS) { + GELOGE(FAILED, "Move in enter nodes' in data anchors down of %s failed.", node->GetName().c_str()); + return FAILED; + } + set out_nodes; + if (InsertEnterAfterNode(node, enter_desc, out_nodes) != SUCCESS) { + GELOGE(FAILED, "Insert enter node after %s failed.", node->GetName().c_str()); + return FAILED; + } + + if (MoveCtrlEdgeToOutNodes(node, out_nodes) != SUCCESS) { + GELOGE(FAILED, "Move %s's control edge to out nodes failed.", node->GetName().c_str()); + return FAILED; + } + + for (auto &out_node : out_nodes) { + GE_CHECK_NOTNULL(out_node); + if (AllInDataNodesUnchangeAndNoMergeOut(out_node)) { + nodes_to_extract.push(out_node); + } + } + } + + if (DeleteEnterWithoutDataOut() != SUCCESS) { + GELOGE(FAILED, "Delete enter node without out data nodes failed."); + return FAILED; + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map> &frame_enter) { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if (IsEnterType(node->GetType())) { + if (!node->GetInControlNodes().empty() || !node->GetOutControlNodes().empty()) { + continue; + } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + string frame_name; + if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str()); + return FAILED; + } + frame_enter[frame_name].emplace_back(node); + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::GetNodeNeedExtract(const map> &frame_enter, + queue &nodes_to_extract) { + for (const auto &one_group : frame_enter) { + auto enters = one_group.second; + for (const auto &enter : enters) { + auto out_data_nodes = enter->GetOutDataNodes(); + for (const auto &out_data_node : out_data_nodes) { + GE_CHECK_NOTNULL(out_data_node); + if (AllInDataNodesUnchangeAndNoMergeOut(out_data_node)) { + nodes_to_extract.push(out_data_node); + } + } + } + } + + return SUCCESS; +} + +bool MultiBatchGraphCopyer::AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node) { + auto out_data_nodes = node->GetOutDataNodes(); + for (const auto &out_data_node : out_data_nodes) { + if (out_data_node == nullptr) { + return false; + } + + if (out_data_node->GetType() == MERGE || out_data_node->GetType() == REFMERGE) { + return false; + } + } + + auto in_data_nodes = node->GetInDataNodes(); + if (in_data_nodes.size() == kOneInDataNode) { + return true; + } + + for (const auto &in_data_node : in_data_nodes) { + if (in_data_node == nullptr) { + return false; + } + if (unchange_types.count(in_data_node->GetType()) == kFindNoMatch) { + return false; + } + } + + return true; +} + +Status MultiBatchGraphCopyer::MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc) { + auto in_data_anchors = node->GetAllInDataAnchors(); + for (auto &in_data_anchor : in_data_anchors) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor); + auto peer_in_data_node = peer_out_data_anchor->GetOwnerNode(); + if (IsEnterType(peer_in_data_node->GetType())) { + GE_CHK_STATUS_RET(peer_out_data_anchor->Unlink(in_data_anchor)) + GELOGD("Unlink data edge from %s to %s.", peer_in_data_node->GetName().c_str(), node->GetName().c_str()); + auto enter_in_data_anchors = peer_in_data_node->GetAllInDataAnchors(); + for (auto &enter_in_data_anchor : enter_in_data_anchors) { + auto peer_out_data_anchor_of_enter = enter_in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor_of_enter); + if (peer_out_data_anchor_of_enter->IsLinkedWith(in_data_anchor)) { + continue; + } + GE_CHK_STATUS_RET(peer_out_data_anchor_of_enter->LinkTo(in_data_anchor)) + GELOGD("Relink data edge from %s to %s.", peer_out_data_anchor_of_enter->GetOwnerNode()->GetName().c_str(), + node->GetName().c_str()); + } + enter_desc = peer_in_data_node->GetOpDesc(); + GE_CHECK_NOTNULL(enter_desc); + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::InsertEnterAfterNode(NodePtr &node, const OpDescPtr ©_desc, set &out_nodes) { + if (copy_desc == nullptr) { + return SUCCESS; + } + map>> outanchors_inanchors_nodes; + auto out_data_anchors = node->GetAllOutDataAnchors(); + for (auto &out_data_anchor : out_data_anchors) { + auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); + for (auto peer_in_data_anchor : peer_in_data_anchors) { + GE_CHECK_NOTNULL(peer_in_data_anchor); + auto peer_in_data_node = peer_in_data_anchor->GetOwnerNode(); + out_nodes.emplace(peer_in_data_node); + outanchors_inanchors_nodes[out_data_anchor].emplace_back(std::make_pair(peer_in_data_anchor, peer_in_data_node)); + } + } + + int32_t i = 0; + auto node_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(node_desc); + // Insert one enter node after node's per out data anchor + for (auto &outanchor_inanchors_nodes : outanchors_inanchors_nodes) { + string name = node->GetName() + "_" + ENTER + "_" + std::to_string(i++); + GELOGD("Create Enter op %s after %s.", name.c_str(), node->GetName().c_str()); + auto enter_desc = AttrUtils::CopyOpDesc(copy_desc); + enter_desc->SetName(name); + GE_CHK_STATUS_RET( + enter_desc->UpdateInputDesc("x", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) + GE_CHK_STATUS_RET( + enter_desc->UpdateOutputDesc("y", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) + auto enter_node = graph_->AddNode(enter_desc); + GE_CHECK_NOTNULL(enter_node); + GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->LinkTo(enter_node->GetInDataAnchor(kDataInIndex))) + GE_CHECK_NOTNULL(enter_node->GetOutDataAnchor(kDataInIndex)); + for (auto &inanchor_node : outanchor_inanchors_nodes.second) { + GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->Unlink(inanchor_node.first)) + GE_CHK_STATUS_RET(enter_node->GetOutDataAnchor(kDataInIndex)->LinkTo(inanchor_node.first)) + GELOGD("Unlink from %s to %s, link from %s to %s then to %s.", node->GetName().c_str(), + inanchor_node.second->GetName().c_str(), node->GetName().c_str(), enter_node->GetName().c_str(), + inanchor_node.second->GetName().c_str()); + } + } + + return SUCCESS; +} + +// Move node's in control edges to out data nodes +Status MultiBatchGraphCopyer::MoveCtrlEdgeToOutNodes(NodePtr &node, set &out_nodes) { + auto in_ctrl_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); + for (auto &peer_out_ctrl_anchor : peer_out_ctrl_anchors) { + GE_CHK_STATUS_RET(peer_out_ctrl_anchor->Unlink(in_ctrl_anchor)) + GELOGD("Unlink control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), + node->GetName().c_str()); + for (auto &out_node : out_nodes) { + auto in_ctrl_anchor_of_out_node = out_node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor_of_out_node); + if (!peer_out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor_of_out_node)) { + GE_CHK_STATUS_RET(peer_out_ctrl_anchor->LinkTo(in_ctrl_anchor_of_out_node)) + GELOGD("Link control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), + out_node->GetName().c_str()); + } + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::DeleteEnterWithoutDataOut() { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if (IsEnterType(node->GetType())) { + auto out_nodes = node->GetOutAllNodes(); + if (out_nodes.empty()) { + GELOGD("Delete enter node: %s which has no output.", node->GetName().c_str()); + GE_CHK_STATUS_RET(GraphUtils::IsolateNode(node, {})) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph_, node)) + } + } + } + + return SUCCESS; +} + void MultiBatchGraphCopyer::LabelStatusForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); GELOGI("Label status for %s, shape_dims is %s.", data->GetName().c_str(), @@ -297,6 +585,9 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { LabelStatusForGetNextSink(data); } } + + map> frame_enters; + InitStatus(frame_enters); bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { @@ -306,12 +597,13 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { if (iter != origin_nodes_status_.end()) { continue; } - for (auto &in_node : node->GetInAllNodes()) { - bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && - origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; - if (is_in_batch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - changed = true; + for (auto &in_node : node->GetInDataNodes()) { + if (origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end()) { + if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end()) { + origin_nodes_status_[node.get()] == kNodeInBatchBranch; + ResetEnterStatus(frame_enters, node); + changed = true; + } break; } } @@ -320,6 +612,45 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { return SUCCESS; } +void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { + for (const auto &node : origin_all_nodes_) { + if (!IsEnterType(node->GetType())) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + string frame_name; + if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + frame_enters[frame_name].emplace_back(node); + } + } + + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } +} + +void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { + if (!IsEnterType(node->GetType())) { + return; + } + + for (const auto &frame_enter : frame_enters) { + auto &enters = frame_enter.second; + if (std::find(enters.begin(), enters.end(), node) != enters.end()) { + for (const auto &enter : enters) { + origin_nodes_status_[enter.get()] = kNodeInBatchBranch; + } + break; + } + } +} + Status MultiBatchGraphCopyer::LabelStatus() { if (LabelInBatchBranchStatus() != SUCCESS) { GELOGE(PARAM_INVALID, "Failed to label no in batch branch"); @@ -1360,52 +1691,6 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { return SUCCESS; } -Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { - for (auto &node : graph_->GetAllNodes()) { - if (node->GetType() != SWITCHN) { - continue; - } - auto switchn_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); - size_t i = 0; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto out_node = in_data_anchor->GetOwnerNode(); - auto op_desc = out_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { - GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); - continue; - } - - auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); - GE_CHECK_NOTNULL(identity_desc); - - string batch_label; - if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - GELOGE(FAILED, "Set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", identity_desc->GetName().c_str()); - return FAILED; - } - } - - auto data_desc = switchn_desc->GetOutputDesc(i); - i++; - GE_CHK_STATUS_RET(identity_desc->AddInputDesc("x", data_desc)); - GE_CHK_STATUS_RET(identity_desc->AddOutputDesc("y", data_desc)); - - auto identity_node = graph_->AddNode(identity_desc); - GE_CHECK_NOTNULL(identity_node); - GE_CHK_STATUS_RET(out_data_anchor->LinkTo(identity_node->GetInDataAnchor(0))); - GE_CHECK_NOTNULL(identity_node->GetOutControlAnchor()); - GE_CHK_STATUS_RET(identity_node->GetOutControlAnchor()->LinkTo(out_node->GetInControlAnchor())); - } - } - } - - return SUCCESS; -} - Status ProcessMultiBatch(ComputeGraphPtr &graph) { const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); if (multi_batch_with_case != nullptr) { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index a0de4413..d51c4c02 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "external/ge/ge_api_error_codes.h" @@ -64,12 +65,26 @@ class MultiBatchGraphCopyer { private: Status Init(); Status CheckArguments(); + Status RelinkConstCtrlEdge(); + + Status ExtractUnchangedStructureOutofCycle(); + Status GetEnterNodesGroupByFrame(std::map> &frame_enter); + Status GetNodeNeedExtract(const std::map> &frame_enter, + std::queue &nodes_to_extract); + bool AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node); + Status MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc); + Status InsertEnterAfterNode(NodePtr &node, const OpDescPtr &enter_desc, std::set &out_nodes); + Status MoveCtrlEdgeToOutNodes(NodePtr &node, std::set &out_nodes); + Status DeleteEnterWithoutDataOut(); // label status for origin_all_nodes_ Status LabelStatus(); Status LabelInBatchBranchStatus(); void LabelStatusForData(const NodePtr &data); void LabelStatusForGetNextSink(const NodePtr &data); + void InitStatus(std::map> &frame_enters); + void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); + // add nodes functions Status CreateNewNodes(); @@ -81,7 +96,6 @@ class MultiBatchGraphCopyer { Status InsertSwitchNForData(const NodePtr &node, const size_t &out_anchor_index, const size_t &peer_in_anchor_index, std::vector> &dynamic_out_to_switchn); - Status InsertIdentityAfterSwitchN(); Status UpdateMaxShapeToData(const NodePtr &node, size_t out_anchor_index); Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); From 7fe250695305473b22a4b652a70f81ec3101c98d Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Mon, 7 Dec 2020 15:28:01 +0800 Subject: [PATCH 026/127] modify for static check 2 --- ge/graph/load/new_model_manager/data_dumper.cc | 4 ++-- .../new_model_manager/task_info/stream_switch_task_info.h | 4 ++-- ge/graph/load/new_model_manager/task_info/task_info.h | 4 ++-- ge/hybrid/executor/hybrid_model_executor.cc | 2 +- ge/hybrid/hybrid_davinci_model.cc | 4 ++-- ge/hybrid/model/hybrid_model_builder.cc | 2 +- ge/hybrid/node_executor/controlop/control_op_executor.cc | 2 +- inc/framework/common/taskdown_common.h | 2 -- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index 4534fe73..b331d780 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector exceptio ReplaceStringElem(op_name); ReplaceStringElem(op_type); string dump_file_path = - "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); + "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); GELOGI("The exception dump file path is %s", dump_file_path.c_str()); uint64_t proto_size = dump_data.ByteSizeLong(); - unique_ptr proto_msg(new (std::nothrow) char[proto_size]); + std::unique_ptr proto_msg(new (std::nothrow) char[proto_size]); bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); if (!ret || proto_size == 0) { GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h index 89642cf8..a72d7de2 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo { Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; private: - void SetInputAndValuePtr(DavinciModel *davinci_model, const vector &input_data_addrs); + void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector &input_data_addrs); void *input_ptr_; rtCondition_t cond_; void *value_ptr_; @@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo { uint32_t true_stream_id_; rtSwitchDataType_t data_type_; static const uint32_t kInputNum = 2; - vector fixed_addr_offset_; + std::vector fixed_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index fe9c8c37..26f22564 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -63,8 +63,8 @@ struct RuntimeParam { }; typedef struct FusionOpInfo { - vector original_op_names; - string op_name; + std::vector original_op_names; + std::string op_name; uint32_t op_index; uint32_t stream_id; } FusionOpInfo; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 4af34451..8ba687c2 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -82,7 +82,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, Status HybridModelExecutor::Cleanup() { GELOGD("Start to cleanup."); context_.callback_manager->Destroy(); - RuntimeInferenceContext::DestroyContext(to_string(context_.session_id)); + RuntimeInferenceContext::DestroyContext(std::to_string(context_.session_id)); GELOGD("Cleanup successfully."); return SUCCESS; } diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index d696adf9..b6f5bb84 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -78,8 +78,8 @@ HybridDavinciModel::~HybridDavinciModel() { delete impl_; } -unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - auto instance = unique_ptr(new (std::nothrow)HybridDavinciModel()); +std::unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { + auto instance = std::unique_ptr(new (std::nothrow)HybridDavinciModel()); if (instance != nullptr) { instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); if (instance->impl_ != nullptr) { diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index f9564a8f..cd4c0a83 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -957,7 +957,7 @@ Status HybridModelBuilder::IndexTaskDefs() { // index task defs GELOGD("To index tasks for subgraph: %s", name.c_str()); - unordered_map node_map; + std::unordered_map node_map; for (const auto &node : sub_graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 83fc09ee..74920b22 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -405,7 +405,7 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, auto node_item = model.GetNodeItem(node); GE_CHECK_NOTNULL(node_item); - unique_ptr node_task; + std::unique_ptr node_task; auto node_type = node->GetType(); if (node_type == IF || node_type == STATELESSIF) { node_task.reset(new(std::nothrow) IfOpNodeTask()); diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h index b1364d16..12c6af89 100644 --- a/inc/framework/common/taskdown_common.h +++ b/inc/framework/common/taskdown_common.h @@ -19,8 +19,6 @@ #include "runtime/rt.h" -using namespace std; - namespace ge { #define CC_FUSION_OP_MAX 32 From 2c24f922ffc2c8ad27ccebf38028ac1c4dd7957f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8A=A2?= Date: Mon, 7 Dec 2020 15:30:22 +0800 Subject: [PATCH 027/127] cpplint cast fix --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- .../task_info/super_kernel/super_kernel.cc | 9 +++++---- .../task_info/super_kernel/super_kernel_factory.cc | 12 ++++++------ ge/graph/load/new_model_manager/zero_copy_task.cc | 2 +- ge/omm/csa_interact.cc | 2 +- ge/session/omg.cc | 6 +++--- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 81d47b3b..1a4a5014 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2801,7 +2801,7 @@ void *DavinciModel::Run(DavinciModel *model) { reinterpret_cast(shape_data_buffer_data) + shape_data_buffer_length / sizeof(int64_t)); GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - delete[] (int64_t *)current_data.blobs.back().data; + delete[] reinterpret_cast(current_data.blobs.back().data); current_data.blobs.pop_back(); } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 63f29f84..e94fa425 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return - RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); + rtError_t rt_ret = rtMalloc(reinterpret_cast(device_args_addr_), sizeof(args), RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) + rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), + RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 69f7b159..39373901 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -87,7 +87,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list } GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); const size_t nav_table_len = 2 * stub_func_list.size(); - std::unique_ptr nav_table(new(std::nothrow) uint64_t[nav_table_len]); + std::unique_ptr nav_table(new (std::nothrow) uint64_t[nav_table_len]); GE_CHECK_NOTNULL(nav_table); uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); @@ -106,16 +106,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list nav_table[i * 2 + 1] = static_cast(reinterpret_cast(args_addr_list[i])); GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); } - rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); + rt_ret = rtMalloc(reinterpret_cast(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = - rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(reinterpret_cast(hbm_nav_table_addr), nav_table_size, + reinterpret_cast(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel - h = std::unique_ptr( - new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); + h = + std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); return SUCCESS; } } // namespace skt diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 9b42d563..2609cb4b 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); - *(uintptr_t *)(args_info + offset) = reinterpret_cast(dst_addr); + *reinterpret_cast(args_info + offset)= reinterpret_cast(dst_addr); is_updated_ = true; } } diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc index 1599af94..1b33ddbd 100644 --- a/ge/omm/csa_interact.cc +++ b/ge/omm/csa_interact.cc @@ -202,7 +202,7 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c } } - mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); + mmSsize_t ret = mmWrite(fd, reinterpret_cast(const_cast(content.c_str())), content.length()); if (ret == EN_ERROR) { GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); ret = mmClose(fd); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index df837f99..b5e1e105 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -891,7 +891,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con if (status != ge::GRAPH_SUCCESS) { GELOGE(ge::FAILED, "Om file init failed."); if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return status; @@ -902,7 +902,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con if (status != ge::GRAPH_SUCCESS) { GELOGE(ge::FAILED, "Get model part failed."); if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return status; @@ -928,7 +928,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con } if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return ret; From 65b310205a075c72c238bd444a1396b97fbc8211 Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 7 Dec 2020 15:33:28 +0800 Subject: [PATCH 028/127] clean bc warning and add atc.bin fwk_atc.bin --- ge/offline/CMakeLists.txt | 126 +++- ge/offline/atc | 20 + ge/offline/module.mk | 105 ++++ ge/stub/gen_stubapi.py | 7 + metadef | 2 +- parser | 2 +- tests/st/CMakeLists.txt | 42 -- tests/st/resnet50/common.cc | 768 ------------------------- tests/st/resnet50/common.h | 102 ---- tests/st/resnet50/ptest.h | 225 -------- tests/st/resnet50/resnet50_train.cc | 852 ---------------------------- tests/st/test_ge_st.py | 56 -- 12 files changed, 257 insertions(+), 2050 deletions(-) create mode 100644 ge/offline/atc delete mode 100644 tests/st/CMakeLists.txt delete mode 100644 tests/st/resnet50/common.cc delete mode 100644 tests/st/resnet50/common.h delete mode 100644 tests/st/resnet50/ptest.h delete mode 100644 tests/st/resnet50/resnet50_train.cc delete mode 100644 tests/st/test_ge_st.py diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 49af37c0..b3a0d53c 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -11,13 +11,13 @@ set(SRC_LIST "main.cc" "single_op_parser.cc" "../session/omg.cc" - "../ir_build/atc_ir_common.cc" + "../ir_build/atc_ir_common.cc" ) ############ atc ############ add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) -target_compile_options(atc PRIVATE +target_compile_options(atc PRIVATE -Werror -O2 -Wno-deprecated-declarations @@ -74,10 +74,130 @@ target_link_libraries(atc PRIVATE -ldl ) +############ atc.bin ############ +add_executable(atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + static_mmpa + -lrt + -ldl +) + +############ fwk_atc.bin ############ +add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(fwk_atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(fwk_atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(fwk_atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(fwk_atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + static_mmpa + -lrt + -ldl +) + ############ install ############ set(INSTALL_BASE_DIR "") set(INSTALL_LIBRARY_DIR lib) -install(TARGETS atc OPTIONAL +install(TARGETS atc atc.bin fwk_atc.bin OPTIONAL LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} ) diff --git a/ge/offline/atc b/ge/offline/atc new file mode 100644 index 00000000..a2b96482 --- /dev/null +++ b/ge/offline/atc @@ -0,0 +1,20 @@ +#!/bin/bash +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + +LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) +PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd) +LIB_P="/lib64" +PYTHON_P="/python/site-packages" +LIB64_PATH="${PKG_PATH}${LIB_P}" +PYTHON_PATH="${PKG_PATH}${PYTHON_P}" +export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" +export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" + +if [ -f "${PKG_PATH}/bin/atc.bin" ];then + atc.bin $@ +else + fwk_atc.bin $@ +fi diff --git a/ge/offline/module.mk b/ge/offline/module.mk index 8859df29..c14be50f 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -54,3 +54,108 @@ LOCAL_LDFLAGS := -lrt -ldl include $(BUILD_HOST_EXECUTABLE) +include $(CLEAR_VARS) + +LOCAL_MODULE := atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) + +include $(CLEAR_VARS) + +LOCAL_MODULE := fwk_atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index f2a6a287..d19b44a6 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -1,3 +1,10 @@ +#!/usr/bin/python3.7 +# -*- coding: UTF-8 -*- +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + import os import re import sys diff --git a/metadef b/metadef index 29c31bb8..5b9a7f84 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 29c31bb87d8bbe6904ab6fa72034a803fb50a746 +Subproject commit 5b9a7f84a4347f8816d492aa51f2414ccf8a0744 diff --git a/parser b/parser index ba956d34..70369668 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit ba956d349d8ad3e864d27467f4f0119333cbadc6 +Subproject commit 70369668abebed84942d9f355494a89e82cc1eac diff --git a/tests/st/CMakeLists.txt b/tests/st/CMakeLists.txt deleted file mode 100644 index 56babec1..00000000 --- a/tests/st/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -cmake_minimum_required(VERSION 3.0) -set(CMAKE_CXX_STANDARD 11) -project(ge_st CXX C) - -set(CMAKE_CXX_FLAGS "-O1 -fPIC -Wl,-unresolved-symbols=ignore-in-shared-libs") - - -file(GLOB_RECURSE RES50_TRAIN_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "resnet50/resnet50_train.cc" - "resnet50/common.cc" -) - -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/graph) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/ge) -include_directories(${GE_SOURCE_DIR}/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops) -include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc) - -add_executable(st_resnet50_train ${RES50_TRAIN_SRCS}) -target_link_libraries(st_resnet50_train - ${PROTOBUF_LIBRARY} - ge_client_train ge_memory -) \ No newline at end of file diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc deleted file mode 100644 index 674ef926..00000000 --- a/tests/st/resnet50/common.cc +++ /dev/null @@ -1,768 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "model.h" - -#define MAX_HEAD_SIZE 50 - -using namespace std; -using namespace ge; - -void update_op_format(Operator ops, Format format) { - printf("set format begin.........\n"); - ge::TensorDesc tensor_desc_x = ops.GetInputDesc("x"); - ge::TensorDesc tensor_desc_y = ops.GetOutputDesc("y"); - Format f_x0 = tensor_desc_x.GetFormat(); - Format f_y0 = tensor_desc_x.GetFormat(); - printf("before set x format:%d \n", f_x0); - printf("before set y format:%d \n", f_y0); - printf("format to be set is :%d \n", format); - tensor_desc_x.SetFormat(format); - tensor_desc_y.SetFormat(format); - ops.UpdateInputDesc("x", tensor_desc_x); - ops.UpdateOutputDesc("y", tensor_desc_y); - Format f_x = tensor_desc_x.GetFormat(); - Format f_y = tensor_desc_y.GetFormat(); - printf("after set x format:%d \n", f_x); - printf("after set y format:%d \n", f_y); -} - -/// getDimInfo: get dim info from data file -/// param: -/// fp: the testing datafile object -/// -/// return : -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3,162(3*3*6*3)],4 is dim size,3,3,6,3 is the -/// dim shape data_size: the size of the testing data including the data file -void getDimInfo(FILE *fp, std::vector &dim_info) { - // get dim info from hisi testing data file - uint32_t *dim_buffer = (uint32_t *)malloc(MAX_HEAD_SIZE * sizeof(uint32_t)); - fread(dim_buffer, sizeof(uint32_t), MAX_HEAD_SIZE, fp); - dim_info.push_back(*dim_buffer); // get dim size - - // get data shape to compute the datasize - uint64_t data_size = 1; - uint32_t i = 1; - for (; i <= dim_info[0]; i++) { - dim_info.push_back(*(dim_buffer + i)); - data_size *= *(dim_buffer + i); - } - dim_info.push_back(data_size); - - free(dim_buffer); -} - -/// readTestDataFile: read test date from hisi .t datafile -/// param: -/// infile: the path of hisi .t datafile -/// return: -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3],4 is dim size,3,3,6,3 is the dim shape -void *readTestDataFile(std::string infile, std::vector &dim_info) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } else { - getDimInfo(fp, dim_info); - uint64_t data_size = dim_info[dim_info.size() - 1]; - - fclose(fp); - - fp = fopen(infile.c_str(), "r"); - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint32_t *memory = (uint32_t *)malloc((dim_info[0] + 1 + data_size) * sizeof(uint32_t)); - fread(memory, sizeof(uint32_t), (dim_info[0] + 1 + data_size), fp); - fclose(fp); - return memory + (dim_info[0] + 1); - } -} - -void *readUint8TestDataFile(std::string infile, int size) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint8_t *memory = (uint8_t *)malloc((size) * sizeof(uint8_t)); - fread(memory, sizeof(uint8_t), (size), fp); - fclose(fp); - return memory; -} - -/// allclose -/// param: -/// a:compared file a -/// b:compared file b -/// count: the count size which will compare -/// rtol: -/// atol: -/// return: -/// true or false -bool allclose(float *a, float *b, uint64_t count, float rtol = 1e-05, float atol = 1e-08) { - uint32_t i = 0; - - for (; i < count; ++i) { - if (fabs(a[i] - b[i]) > (atol + rtol * fabs(b[i]))) { - printf("compara failed: i= %d, a[i]=%f, b[i]=%f,atol=%f,rtol=%f\n", i, a[i], b[i], atol, rtol); - return false; - } - } - - return true; -} - -/// compFp32WithTData: compare the data with the data in hisi .t file -/// param: -/// actual_output_data: the result of ge -/// expected_data_file: the path of hisi .t result file -/// rtol: -/// atol: -/// return: -/// true of false -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol = 1e-05, float atol = 1e-08) { - std::vector dim_info; - float *expected_output_data = (float *)readTestDataFile(expected_data_file, dim_info); - - uint32_t i = 1; - uint64_t data_size = 1; - for (; i <= dim_info[0]; i++) { - data_size *= dim_info[i]; - } - return allclose(actual_output_data, expected_output_data, data_size, rtol, atol); -} - -int SwitchDatatype(DataType dt) { - int size = 1; - if (dt == ge::DT_FLOAT) size = 4; - if (dt == ge::DT_INT32) size = 4; - if (dt == ge::DT_FLOAT16) size = 2; - if (dt == ge::DT_INT64) size = 8; - return size; -} - -ge::Tensor genTensor(std::vector tensor_shape, Format format, DataType dt) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - int data_type_size = SwitchDatatype(dt); - - size = abs(size * data_type_size); - vector data_value; - - if (size == 0) { - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; - } - for (int i = 0; i < size; i++) { - data_value.push_back(1); - } - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; -} - -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - float *data_value = new float[size]; - for (int i = 0; i < size; i++) { - *(data_value + i) = value; - } - Tensor gen_ge_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), FORMAT_NCHW); - gen_ge_tensor.SetTensorDesc(input_tensor_desc); - gen_ge_tensor.SetData((uint8_t *)data_value, size * 4); - - return gen_ge_tensor; -} - -Tensor genTesnor_Shape_as_data(std::vector tensor_shape) { - Format format = FORMAT_NCHW; - DataType dt = DT_INT32; - int size = tensor_shape.size(); - int32_t *tensor_data = new int32_t[size]; - std::cout << "shape tensor size:" << size << endl; - for (int i = 0; i < size; i++) { - *(tensor_data + i) = tensor_shape[i]; - } - - Tensor gen_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape({size}), FORMAT_NCHW, DT_INT32); - gen_tensor.SetData((uint8_t *)tensor_data, size * GetDatTypeSize(dt)); - gen_tensor.SetTensorDesc(input_tensor_desc); - - return gen_tensor; -} - -/// train_flag is 0 when infer; train_flag is 1 when train; train_flag is 0 default -/// run_mode_path is not 0,1,2 when TBE; run_mode_path is 1 when FE; run_mode_path is 0 default -/// run_mode_path is 2 now when AICPU, ge.enabledlocalFmkop is 1 -ge::Status GEInitialize_api(string train_flag, string run_mode_path) { - ge::Status ret; - if (run_mode_path == "0") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.aicpuFlag", "1"}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "1") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/bert"}, - {"ge.soLoadPath", "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "2") { - const std::map config = {{"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {LOCAL_FMKOP_FLAG, "1"}}; - ret = ge::GEInitialize(config); - } else { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + run_mode_path}}; - ret = ge::GEInitialize(config); - } - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -/// train_flag is infer default -/// run_mode: is multi group of [fe,aicpu,bert,deeplabv3,mobilenetv2,single_path_nas,ssd] -/// but bert,deeplabv3,mobilenetv2,single_path_nas,ssd can only set one value from array -/// eg:"fe,aicpu,bert" or "fe", default is “fe” -/// "fe,aicpu,bert" remain open fe aicpu and bert -ge::Status GEInitialize_api_new(string train_flag, string run_mode) { - ge::Status ret; - vector modes; - - char *strs = new char[run_mode.length() + 1]; - strcpy(strs, run_mode.c_str()); - const char *delim = ","; - char *p = strtok(strs, delim); - while (p) { - string s = p; // transform substr to string - modes.push_back(s); // save to result array - p = strtok(NULL, delim); - } - - std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.opsProtoLibPath", "/usr/local/HiAI/runtime/ops/op_proto/built-in/libopsproto.so"}}; - if (train_flag == "infer") - config.insert(pair("ge.graphRunMode", "0")); - else if (train_flag == "train") - config.insert(pair("ge.graphRunMode", "1")); - else - std::cout << "GeInitialize give the error param" << std::endl; - - for (int i = 0; i < modes.size(); i++) { - if (modes[i] == "fe") { - config.insert(pair("ge.feFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "aicpu") { - config.insert(pair("ge.aicpuFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/" - "opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "bert" || modes[i] == "deeplabv3" || modes[i] == "mobilenetv2" || - modes[i] == "single_path_nas" || modes[i] == "ssd") { - config.insert(pair(TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + modes[i])); - } else if (modes[i] == "plugin") { - - } else - std::cout << "GeInitialize give the error param" << std::endl; - } - ret = ge::GEInitialize(config); - - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -ge::Status GEFinalize_api() { - ge::Status ret = ge::GEFinalize(); - std::cout << "GEFinalize ret is " << ret << std::endl; - - return ret; -} - -/// set train_flag -/// if run_mode_path is "fe" remain FE process; "fe,plugin" is FE and TBE plugin process -/// "aicpu" is open aicpu plugin -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, string train_flag, - string run_mode_path) { - std::map options = {{RUN_FLAG, "1"}}; - uint32_t graph_id = 0; - - ge::Status ret = GEInitialize_api_new(train_flag, run_mode_path); - EXPECT_EQ(ret, ge::SUCCESS); - - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - std::vector input; - if (attr_test.find("input1") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input1"]); - input.push_back(input_tensor); - } - if (attr_test.find("input2") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input2"]); - input.push_back(input_tensor); - } - if (attr_test.find("input3") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input3"]); - input.push_back(input_tensor); - } - std::vector output; - - ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - if (train_flag == "1") { - setenv("GE_TRAIN", "1", true); - ret = session->RunGraph(graph_id, input, output); - setenv("GE_TRAIN", "0", true); - } else { - ret = session->RunGraph(graph_id, input, output); - } - delete session; - GEFinalize_api(); - - if (ret != ge::SUCCESS) { - std::cout << " run graph failed" << std::endl; - return -1; - } else { - return 0; - } -} - -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graph_id, Graph &graph, std::vector inputs, - std::vector &outputs) { - ge::Status ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - ret = session->RunGraph(graph_id, inputs, outputs); - - return ret; -} - -ge::Session *create_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -ge::Session *create_aipp_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}, {"ge.insertOpFile", "/root/host/ge/aipp.cfg"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -int buildCheckPointGraph(Graph &graph, map variables) { - std::vector inputs{}; - std::vector outputs{}; - - for (map::iterator it = variables.begin(); it != variables.end(); ++it) { - auto var = op::Variable(string(it->first)); - var.update_output_desc_y(it->second); - inputs.push_back(var); - graph.AddOp(var); - } - - auto save = op::Save().create_dynamic_input_tensors(inputs.size()); - for (int i = 0; i < inputs.size(); i++) { - save.set_dynamic_input_tensors(i, inputs[i]); - } - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor_withVaule(desc_var[i].GetShape().GetDims(), values_var[i]); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor(desc_var[i].GetShape().GetDims(), desc_var[i].GetFormat(), desc_var[i].GetDataType()); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - - graph.AddOp(var_constant); - graph.AddOp(var_init); - graph.AddOp(var_assign); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -bool build_multi_input_multi_output_graph(Graph &graph) { - auto data1 = op::Data("Data1").set_attr_index(0); - auto data2 = op::Data("Data2").set_attr_index(1); - - vector dim_info; - - auto relu1 = op::Relu("Relu1").set_input_x(data1); - auto relu2 = op::Relu("Relu2").set_input_x(data2); - - auto eltwise = op::Eltwise("Eltwise") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, relu1) - .set_dynamic_input_x(1, relu2) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise1 = op::Eltwise("Eltwise1") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise2 = op::Eltwise("Eltwise2") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - std::vector inputs{data1, data2}; - std::vector outputs{eltwise1, eltwise2}; - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -void build_big_graph(Graph &graph, map> attr) { - auto data = op::Data("Data").set_attr_index(0); - auto weight = op::Const("weight1").set_attr_value(genTensor(attr["weight"])); - vector weight_shape(attr["weight"].begin(), attr["weight"].end()); - TensorDesc weight_desc(ge::Shape(weight_shape), FORMAT_NCHW, DT_FLOAT); - weight.update_output_desc_y(weight_desc); - auto conv_1 = op::Conv2D("conv1").set_input_x(data).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - auto conv_2 = op::Conv2D("conv2").set_input_x(conv_1).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_3 = op::Conv2D("conv3").set_input_x(conv_2).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_4 = op::Conv2D("conv4").set_input_x(conv_3).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_5 = op::Conv2D("conv5").set_input_x(conv_4).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_6 = op::Conv2D("conv6").set_input_x(conv_5).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_7 = op::Conv2D("conv7").set_input_x(conv_6).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_8 = op::Conv2D("conv8").set_input_x(conv_7).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_9 = op::Conv2D("conv9").set_input_x(conv_8).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_10 = op::Conv2D("conv10").set_input_x(conv_9).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_11 = op::Conv2D("conv11").set_input_x(conv_10).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_12 = op::Conv2D("conv12").set_input_x(conv_11).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_13 = op::Conv2D("conv13").set_input_x(conv_12).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_14 = op::Conv2D("conv14").set_input_x(conv_13).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_15 = op::Conv2D("conv15").set_input_x(conv_14).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_16 = op::Conv2D("conv16").set_input_x(conv_15).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_17 = op::Conv2D("conv17").set_input_x(conv_16).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_18 = op::Conv2D("conv18").set_input_x(conv_17).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_19 = op::Conv2D("conv19").set_input_x(conv_18).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_20 = op::Conv2D("conv20").set_input_x(conv_19).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_21 = op::Conv2D("conv21").set_input_x(conv_20).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_22 = op::Conv2D("conv22").set_input_x(conv_21).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_23 = op::Conv2D("conv23").set_input_x(conv_22).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_24 = op::Conv2D("conv24").set_input_x(conv_23).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_25 = op::Conv2D("conv25").set_input_x(conv_24).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_26 = op::Conv2D("conv26").set_input_x(conv_25).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_27 = op::Conv2D("conv27").set_input_x(conv_26).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_28 = op::Conv2D("conv28").set_input_x(conv_27).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_29 = op::Conv2D("conv29").set_input_x(conv_28).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_30 = op::Conv2D("conv30").set_input_x(conv_29).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_31 = op::Conv2D("conv31").set_input_x(conv_30).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_32 = op::Conv2D("conv32").set_input_x(conv_31).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_33 = op::Conv2D("conv33").set_input_x(conv_32).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_34 = op::Conv2D("conv34").set_input_x(conv_33).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_35 = op::Conv2D("conv35").set_input_x(conv_34).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_36 = op::Conv2D("conv36").set_input_x(conv_35).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_37 = op::Conv2D("conv37").set_input_x(conv_36).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_38 = op::Conv2D("conv38").set_input_x(conv_37).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_39 = op::Conv2D("conv39").set_input_x(conv_38).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_40 = op::Conv2D("conv40").set_input_x(conv_39).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_41 = op::Conv2D("conv41").set_input_x(conv_40).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_42 = op::Conv2D("conv42").set_input_x(conv_41).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_43 = op::Conv2D("conv43").set_input_x(conv_42).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_44 = op::Conv2D("conv44").set_input_x(conv_43).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_45 = op::Conv2D("conv45").set_input_x(conv_44).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_46 = op::Conv2D("conv46").set_input_x(conv_45).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_47 = op::Conv2D("conv47").set_input_x(conv_46).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_48 = op::Conv2D("conv48").set_input_x(conv_47).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_49 = op::Conv2D("conv49").set_input_x(conv_48).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_50 = op::Conv2D("conv50").set_input_x(conv_49).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_51 = op::Conv2D("conv51").set_input_x(conv_50).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_52 = op::Conv2D("conv52").set_input_x(conv_51).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_53 = op::Conv2D("conv53").set_input_x(conv_52).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_54 = op::Conv2D("conv54").set_input_x(conv_53).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_55 = op::Conv2D("conv55").set_input_x(conv_54).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_56 = op::Conv2D("conv56").set_input_x(conv_55).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_57 = op::Conv2D("conv57").set_input_x(conv_56).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_58 = op::Conv2D("conv58").set_input_x(conv_57).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_59 = op::Conv2D("conv59").set_input_x(conv_58).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_60 = op::Conv2D("conv60").set_input_x(conv_59).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_61 = op::Conv2D("conv61").set_input_x(conv_60).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_62 = op::Conv2D("conv62").set_input_x(conv_61).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_63 = op::Conv2D("conv63").set_input_x(conv_62).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_64 = op::Conv2D("conv64").set_input_x(conv_63).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_65 = op::Conv2D("conv65").set_input_x(conv_64).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_66 = op::Conv2D("conv66").set_input_x(conv_65).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_67 = op::Conv2D("conv67").set_input_x(conv_66).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_68 = op::Conv2D("conv68").set_input_x(conv_67).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_69 = op::Conv2D("conv69").set_input_x(conv_68).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_70 = op::Conv2D("conv70").set_input_x(conv_69).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_71 = op::Conv2D("conv71").set_input_x(conv_70).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_72 = op::Conv2D("conv72").set_input_x(conv_71).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_73 = op::Conv2D("conv73").set_input_x(conv_72).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_74 = op::Conv2D("conv74").set_input_x(conv_73).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_75 = op::Conv2D("conv75").set_input_x(conv_74).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_76 = op::Conv2D("conv76").set_input_x(conv_75).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_77 = op::Conv2D("conv77").set_input_x(conv_76).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_78 = op::Conv2D("conv78").set_input_x(conv_77).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_79 = op::Conv2D("conv79").set_input_x(conv_78).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_80 = op::Conv2D("conv80").set_input_x(conv_79).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_81 = op::Conv2D("conv81").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_82 = op::Conv2D("conv82").set_input_x(conv_81).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_83 = op::Conv2D("conv83").set_input_x(conv_82).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_84 = op::Conv2D("conv84").set_input_x(conv_83).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_85 = op::Conv2D("conv85").set_input_x(conv_84).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_86 = op::Conv2D("conv86").set_input_x(conv_85).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_87 = op::Conv2D("conv87").set_input_x(conv_86).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_88 = op::Conv2D("conv88").set_input_x(conv_87).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_89 = op::Conv2D("conv89").set_input_x(conv_88).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_90 = op::Conv2D("conv90").set_input_x(conv_89).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_91 = op::Conv2D("conv91").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_92 = op::Conv2D("conv92").set_input_x(conv_91).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_93 = op::Conv2D("conv93").set_input_x(conv_92).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_94 = op::Conv2D("conv94").set_input_x(conv_93).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_95 = op::Conv2D("conv95").set_input_x(conv_94).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_96 = op::Conv2D("conv96").set_input_x(conv_95).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_97 = op::Conv2D("conv97").set_input_x(conv_96).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_98 = op::Conv2D("conv98").set_input_x(conv_97).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_99 = op::Conv2D("conv99").set_input_x(conv_98).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_100 = op::Conv2D("conv100").set_input_x(conv_99).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_101 = op::Conv2D("conv101").set_input_x(conv_100).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_102 = op::Conv2D("conv102").set_input_x(conv_101).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_103 = op::Conv2D("conv103").set_input_x(conv_102).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_104 = op::Conv2D("conv104").set_input_x(conv_103).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_105 = op::Conv2D("conv105").set_input_x(conv_104).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_106 = op::Conv2D("conv106").set_input_x(conv_105).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_107 = op::Conv2D("conv107").set_input_x(conv_106).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_108 = op::Conv2D("conv108").set_input_x(conv_107).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_109 = op::Conv2D("conv109").set_input_x(conv_108).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_110 = op::Conv2D("conv110").set_input_x(conv_109).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_111 = op::Conv2D("conv111").set_input_x(conv_110).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_112 = op::Conv2D("conv112").set_input_x(conv_111).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_113 = op::Conv2D("conv113").set_input_x(conv_112).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_114 = op::Conv2D("conv114").set_input_x(conv_113).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_115 = op::Conv2D("conv115").set_input_x(conv_114).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_116 = op::Conv2D("conv116").set_input_x(conv_115).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_117 = op::Conv2D("conv117").set_input_x(conv_116).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_118 = op::Conv2D("conv118").set_input_x(conv_117).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_119 = op::Conv2D("conv119").set_input_x(conv_118).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_120 = op::Conv2D("conv120").set_input_x(conv_119).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_121 = op::Conv2D("conv121").set_input_x(conv_120).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_122 = op::Conv2D("conv122").set_input_x(conv_121).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_123 = op::Conv2D("conv123").set_input_x(conv_122).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_124 = op::Conv2D("conv124").set_input_x(conv_123).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_125 = op::Conv2D("conv125").set_input_x(conv_124).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_126 = op::Conv2D("conv126").set_input_x(conv_125).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_127 = op::Conv2D("conv127").set_input_x(conv_126).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_128 = op::Conv2D("conv128").set_input_x(conv_127).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_129 = op::Conv2D("conv129").set_input_x(conv_128).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_130 = op::Conv2D("conv130").set_input_x(conv_129).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - std::vector inputs{data}; - std::vector outputs{conv_130}; - graph.SetInputs(inputs).SetOutputs(outputs); -} - -int GetDatTypeSize(DataType dt) { - int dailation = 1; - if (dt == ge::DT_FLOAT) - dailation = 4; - else if (dt == ge::DT_FLOAT16) - dailation = 2; - else if (dt == ge::DT_INT16) - dailation = 2; - else if (dt == ge::DT_UINT16) - dailation = 2; - else if (dt == ge::DT_INT32) - dailation = 4; - else if (dt == ge::DT_UINT32) - dailation = 4; - else if (dt == ge::DT_INT64) - dailation = 8; - else if (dt == ge::DT_UINT64) - dailation = 8; - else if (dt == ge::DT_INT8) - dailation = 1; - - return dailation; -} - -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag, - Format format) { - auto data_x_shape = op::Data("xShape").set_attr_index(0); - auto var = op::Variable(name_var[0]); - auto var1 = op::Variable(name_var[1]); //add one seat of ApplyMomentum() - auto label1 = op::Variable(name_var[2]); //add one seat of ApplyMomentum() - auto conv2dgrad = op::Conv2DBackpropFilterD("output_1"); - auto test2 = op::ApplyMomentum(); - - var.update_output_desc_y(desc_var[0]); - var1.update_output_desc_y(desc_var[1]); - label1.update_output_desc_y(desc_var[2]); - - graph.AddOp(var); - graph.AddOp(var1); - graph.AddOp(label1); - - auto conv2d = op::Conv2D().set_input_x(data_x_shape).set_input_filter(var).set_attr_strides({1, 1, 1, 1}).set_attr_pads({0,0,0,0}); - update_op_format(conv2d, format); - ge::TensorDesc tensor_desc_w = conv2d.GetInputDesc("filter"); - tensor_desc_w.SetFormat(format); - conv2d.UpdateInputDesc("filter", tensor_desc_w); - - if (flag >= 1) { - conv2dgrad.set_input_x(data_x_shape) - .set_attr_filter_size(desc_var[0].GetShape().GetDims()) - .set_input_out_backprop(conv2d) - .set_attr_strides({1, 1, 1, 1}) - .set_attr_pads({0, 0, 0, 0}); - update_op_format(conv2dgrad, format); - graph.AddOp(conv2dgrad); - } - if (flag >= 2) { - // set conv2dgrad var - test2.set_input_accum(var1) - .set_input_grad(conv2dgrad) - .set_input_lr(label1) - .set_input_momentum(label1) - .set_input_var(var); - graph.AddOp(test2); - } - - std::vector inputs{data_x_shape}; // set all val - std::vector outputs{conv2d}; - graph.SetInputs(inputs).SetOutputs(outputs); - graph.AddOp(conv2d); - - return 0; -} - -/// load bin data_fail -/// input_path: path of bin data_file -/// shapes: the shape of Tensor -/// ft: the format of Tensor -/// dt: the dataType of Tensor -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft, DataType dt) { - vector dim_info1; - - uint8_t *input_data = (uint8_t *)readTestDataFile(input_path, dim_info1); // common.h - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(shapes), ft, dt); - input_tensor_desc.SetRealDimCnt(shapes.size()); - Tensor input_tensor = Tensor(input_tensor_desc, input_data, GetDatTypeSize(dt) * dim_info1[dim_info1[0] + 1]); - return input_tensor; -} diff --git a/tests/st/resnet50/common.h b/tests/st/resnet50/common.h deleted file mode 100644 index 75805db7..00000000 --- a/tests/st/resnet50/common.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_GE_COMMON_H_ -#define ST_RESNET50_GE_COMMON_H_ -#include "common/ge_inner_error_codes.h" -#include "utils/tensor_utils.h" - -#define MY_USER_GE_LOGI(...) GE_LOG_INFO(1, __VA_ARGS__) -#define MY_USER_GE_LOGW(...) GE_LOG_WARN(1, __VA_ARGS__) -#define MY_USER_GE_LOGE(...) GE_LOG_ERROR(1, 3, __VA_ARGS__) - -#ifndef USER_GE_LOGI -#define USER_GE_LOGI MY_USER_GE_LOGI -#endif // USER_GE_LOGI - -#ifndef USER_GE_LOGW -#define USER_GE_LOGW MY_USER_GE_LOGW -#endif // USER_GE_LOGW - -#ifndef USER_GE_LOGE -#define USER_GE_LOGE MY_USER_GE_LOGE -#endif // USER_GE_LOGE - -/// train_flag is 0 when infer, train_flag is 1 when train.this param is set for RunGranph_readData() and -/// RunGraph_initData() -#define TRAIN_FLAG_INFER "infer" -#define TRAIN_FLAG_TRAIN "train" - -#include -#include -#include -#include -#include -#include -#include - -#include "ge_api.h" -#include "graph.h" -#include "ptest.h" -#include "ops/all_ops.h" -using namespace std; -using namespace ge; - -// read bin file and compile result -void update_op_format(Operator ops, Format format = ge::FORMAT_NCHW); -void getDimInfo(FILE *fp, std::vector &dim_info); -void *readTestDataFile(std::string infile, std::vector &dim_info); -void *readUint8TestDataFile(std::string infile, int size); -bool allclose(float *a, float *b, uint64_t count, float rtol, float atol); -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol, float atol); -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); -// constructor Tensor -int GetDatTypeSize(DataType dt); -ge::Tensor genTensor(std::vector tensor_shape, Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value = 1); -Tensor genTesnor_Shape_as_data(std::vector tensor_shape); -// Init GE -ge::Status GEInitialize_api(string train_flag = "0", string run_mode_path = "0"); -ge::Status GEInitialize_api_new(string train_flag = "infer", string run_mode = "fe"); -ge::Status GEFinalize_api(); -// constructor session and build graph -ge::Session *create_aipp_session(); -ge::Session *create_session(); -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graphId, Graph &graph, std::vector inputs, - std::vector &outputs); - -// common interface for infer -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe"); -void Inputs_load_Data(string op_name, std::vector &input, map> attr_test, - Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -bool comparaData(std::vector &output, string op_name, map> attr_test); -int RunGraph_readData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe", Format format = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); - -// common interface for train -int buildCheckPointGraph(Graph &graph, map variables); -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var); -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var); - -bool build_multi_input_multi_output_graph(Graph &graph); -void build_big_graph(Graph &graph, map> attr); -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag = 2); - -#endif // ST_RESNET50_GE_COMMON_H_ diff --git a/tests/st/resnet50/ptest.h b/tests/st/resnet50/ptest.h deleted file mode 100644 index 568969f8..00000000 --- a/tests/st/resnet50/ptest.h +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_PTEST_H_ -#define ST_RESNET50_PTEST_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ptest { -class assertion_error : public std::exception { - public: - const char *what() const throw() { return "Assertion Exception"; } -}; - -class TestFixture { - public: - virtual void SetUp() {} - virtual void TearDown() {} - void Run() { _func(); } - void BindFunction(std::function function) { _func = function; } - void SetName(const std::string &name) { _name = name; } - std::string Name() const { return _name; } - virtual ~TestFixture() {} - - private: - std::function _func; - std::string _name; -}; - -enum TestResult { SUCCESS, FAILED, UNAVAILABLE, UNKNOWN, NOCASEFOUND }; - -class TestManager { - public: - static TestManager &GetSingleton() { - static TestManager instance; - return instance; - } - void RegisterTest(const std::string &name, TestFixture *fixture) { _testfixtures[name] = fixture; } - - const std::string GetRunningTestcaseName() const { return _running_testcase_name; } - - const std::list GetAllTestNames() const { - std::list result; - for (auto &t : _testfixtures) { - result.push_back(t.first); - } - return result; - } - - TestResult RunTest(const std::string &name) { - if (_testfixtures.find(name) == _testfixtures.end()) { - return NOCASEFOUND; - } - - _running_testcase_name = name; - - do { - SetTestResult(name, UNKNOWN); - _testfixtures[name]->SetUp(); - if (_testresults[name] == FAILED) { - _testresults[name] = UNAVAILABLE; - break; - } - SetTestResult(name, SUCCESS); - try { - _testfixtures[name]->Run(); - } catch (assertion_error &e) { - // Do nothing as the error has been handled by the TestManager. - } - _testfixtures[name]->TearDown(); - } while (0); - - return _testresults[name]; - } - void SetTestResult(const std::string &name, TestResult result) { _testresults[name] = result; } - TestResult GetTestResult(const std::string &name) { return _testresults[name]; } - - private: - std::map _testfixtures; - std::map _testresults; - std::string _running_testcase_name; -}; - -class TestFixtureRegister { - public: - TestFixtureRegister(const std::string &name, TestFixture *fixture, std::function function) { - fixture->BindFunction(function); - fixture->SetName(name); - TestManager::GetSingleton().RegisterTest(name, fixture); - } -}; -} // namespace ptest - -#define _STR(x) #x -#define _EMPTY_NAMESPACE - -#define _TEST(NAMESPACE, FIXTURECLASS, TESTNAME, CASENAME) \ - void g_func_##TESTNAME##_##CASENAME(void); \ - NAMESPACE::FIXTURECLASS g_fixture_##TESTNAME##_##CASENAME; \ - ptest::TestFixtureRegister g_register_##TESTNAME##_##CASENAME( \ - _STR(TESTNAME##_##CASENAME), &g_fixture_##TESTNAME##_##CASENAME, g_func_##TESTNAME##_##CASENAME); \ - void g_func_##TESTNAME##_##CASENAME(void) - -#define TEST(TESTNAME, CASENAME) _TEST(ptest, TestFixture, TESTNAME, CASENAME) - -#define TEST_F(TESTFIXTURE, CASENAME) _TEST(_EMPTY_NAMESPACE, TESTFIXTURE, TESTFIXTURE, CASENAME) - -#define EXPECT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Expectation Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - } \ - } while (0); - -// With the macro definition ensures that the compiler can detect compiler warning. -#define Max_Log_Len 1024 -#define PRINT_ERR(lpszFormat, ...) \ - do { \ - char szTmpBuf[Max_Log_Len + 1] = {0}; \ - snprintf(szTmpBuf, Max_Log_Len, lpszFormat, ##__VA_ARGS__); \ - std::cerr << szTmpBuf << std::endl; \ - } while (0) - -// Increase the content of print error messages and error to facilitate rapid analysis -#define EXPECT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Expectation Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - } \ - } while (0) - -#define ASSERT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Assertion Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -// Add printing error information and error line content for quick analysis -#define ASSERT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Assertion Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -#define CONFIG_ERR "CONFIG_ERR" -#define LOAD_MODEL_ERR "LOAD_MODEL_ERR" -#define FILE_READ_ERR "FILE_READ_ERR" -#define RUN_ERROR "RUN_ERROR" -#define MEM_ERROR "MEM_ERROR" -#define RESULT_ERR "RESULT_ERR" - -#define EXPECT_FALSE(X) EXPECT_TRUE(!(X)) -#define EXPECT_EQ(X, Y) EXPECT_TRUE(((X) == (Y))) -#define EXPECT_NE(X, Y) EXPECT_TRUE(((X) != (Y))) -#define EXPECT_GT(X, Y) EXPECT_TRUE(((X) > (Y))) -#define EXPECT_GE(X, Y) EXPECT_TRUE(((X) >= (Y))) -#define EXPECT_LT(X, Y) EXPECT_TRUE(((X) < (Y))) -#define EXPECT_LE(X, Y) EXPECT_TRUE(((X) <= (Y))) - -#define EXPECT_FALSE_C(X, ERR_TYPE, format, ...) EXPECT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_EQ_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_NE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#define ASSERT_FALSE(X) ASSERT_TRUE(!(X)) -#define ASSERT_EQ(X, Y) ASSERT_TRUE(((X) == (Y))) -#define ASSERT_NE(X, Y) ASSERT_TRUE(((X) != (Y))) -#define ASSERT_GT(X, Y) ASSERT_TRUE(((X) > (Y))) -#define ASSERT_GE(X, Y) ASSERT_TRUE(((X) >= (Y))) -#define ASSERT_LT(X, Y) ASSERT_TRUE(((X) < (Y))) -#define ASSERT_LE(X, Y) ASSERT_TRUE(((X) <= (Y))) - -#define ASSERT_FALSE_C(X, ERR_TYPE, format, ...) ASSERT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_EQ_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_NE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#endif // ST_RESNET50_PTEST_H_ diff --git a/tests/st/resnet50/resnet50_train.cc b/tests/st/resnet50/resnet50_train.cc deleted file mode 100644 index f1d1e58d..00000000 --- a/tests/st/resnet50/resnet50_train.cc +++ /dev/null @@ -1,852 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "ge_api.h" -#include "graph.h" -#include "ops/all_ops.h" -#include "types.h" -#include "utils/tensor_utils.h" - -using namespace std; -using namespace ge; -using namespace op; - -typedef bool (*Func)(Graph &graph); - -#define PADDING_MODE 6 -#define GRAD_PADDING_MODE 3 -vector pad_1{1, 1, 1, 1}; -vector pad_0{0, 0, 0, 0}; -vector stride_1{1, 1}; -vector stride_2{2, 2}; - -// (int out_channels, int h, int w, vector stride{1,1}, vector pad{1,1,1,1}, op::Data() input) -#define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \ - << w << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) \ - << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl; \ - auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims(); \ - for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it++) { \ - cout << *LAYER##_##BLK##_##OPNUM##_tmp_it; \ - } \ - cout << endl; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_attr_strides({1, 1, stride[0], stride[1]}) \ - .set_attr_pads(pad) \ - .set_attr_data_format("NCHW"); \ - update_op_format(LAYER##_##BLK##_##OPNUM); - -#define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME) \ - Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor; \ - float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size]; \ - for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) { \ - *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01; \ - } \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \ - LAYER##_##BLK##_##OPNUM##_size * sizeof(float)); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant = \ - op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data; - -#define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight); \ - auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight); \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_weight); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight); - -// (int out_channels, Operator& input) -#define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_b(LAYER##_##BLK##_##OPNUM##_b) \ - .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_attr_mode(1) \ - .set_attr_epsilon(1e-5) \ - .set_attr_is_training(true); - -#define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b_assign = \ - op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance); \ - \ - auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mean); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_variance); - -// (int out_channels, Operator& input) -#define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y"); - -// (int out_channels, Operator& input) -#define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_attr_ksize({1, 3, 3, 1}) \ - .set_attr_padding("SAME") \ - .set_attr_strides({1, 2, 2, 1}); - -// (int out_channels, Operator& input) -#define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \ - auto LAYER##_##BLK##_##OPNUM = \ - op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y"); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -#define MAKE_RESNET50(input) \ - MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input) \ - MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output) \ - MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output) \ - MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output) \ - MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output) \ - MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output) \ - MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output) \ - MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output) \ - MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output) \ - MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output) \ - MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output) \ - MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output) \ - MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output) \ - MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \ - MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output) \ - MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output) \ - \ - auto &resnet50_output = layer16_output; \ - auto &resnet50_output_label = layer16_output_label; - -#define MAKE_RESNET50_VAR(inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs) \ -//--------------------------------------------------------------------------------------------- - -// (Operator& input) -#define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(input, input.name_out_dx()); - -// (Operator& input) -#define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input); - -// (Operator& input) -#define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input); - -// (Operator& input_grad, Operator& input_maxpool) -#define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_grad(input_grad) \ - .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax()) \ - .set_attr_ksize({1, 1, 3, 3}) \ - .set_attr_strides({1, 1, 2, 2}) \ - .set_attr_padding("SAME"); - -// (Operator& input_dy) -#define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_gradients(input_dy, dy_label) \ - .set_input_features(LAYER##_##BLK##_##OPNUM, "y"); - -// (Operator& input_dy) -#define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_dy(input_dy, "backprops") \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean") \ - .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance") \ - .set_attr_epsilon(0.0001); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_scale = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_b = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_b); - -// (Operator& input) -#define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propfilter = \ - op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims()) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter); \ - auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_weight); - -///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1), -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2], -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]}) -#define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propinput = \ - op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput")) \ - .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims()) \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propinput); \ - auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y" - -// (int out_channels, Operator& input) -#define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x1(input_x1, input_x1_label) \ - .set_input_x2(input_x2, input_x2_label); - -// (Operator& input) -#define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input) -#define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - input_dy, dy_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input_dy) -#define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -// (Operator& input_dy) -#define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -#define MAKE_RESNET50_GRAD(input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label) \ - \ - auto &resnet50_grad_output = layer1_grad_output; \ - auto &resnet50_grad_output_label = layer1_grad_output_label; - -bool resnet50(Graph &graph) { - auto data = op::Data().set_attr_index(0); - auto data1 = op::Data().set_attr_index(1); - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - data.update_output_desc_y(shape_desc); - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - - auto var = op::Variable("conv2d_var"); - var.update_output_desc_y(desc); - var.update_input_desc_x(desc); - - auto varw1 = op::Variable("conv2d_varw1"); - varw1.update_output_desc_y(desc); - - auto conv2d = op::Conv2D("Translate") - .set_input_x(data) - .set_input_filter(var) - .set_attr_strides({1, 1, 2, 2}) - .set_attr_pads({2, 3, 2, 3}) - .set_attr_data_format("NCHW"); - TensorDesc desc_y; - desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112 - conv2d.update_output_desc_y(desc_y); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto var1 = op::Variable("bn_var1"); - var1.update_output_desc_y(desc1); - - auto var2 = op::Variable("bn_var2"); - var2.update_output_desc_y(desc1); - - auto var3 = op::Variable("bn_var3"); - var3.update_output_desc_y(desc1); - - auto var4 = op::Variable("bn_var4"); - var4.update_output_desc_y(desc1); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - - auto var5 = op::Variable("var5"); - var5.update_output_desc_y(desc2); - - auto var6 = op::Variable("var6"); - var6.update_output_desc_y(desc2); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - - auto label1 = op::Variable("label1"); - label1.update_output_desc_y(desclabel); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto matvar = op::Variable("matvar"); - matvar.update_output_desc_y(descmatlabel); - - auto matvar1 = op::Variable("matvar1"); - matvar1.update_output_desc_y(descmatlabel); - - auto bn = op::FusedBatchNorm() - .set_input_x(conv2d, "y") - .set_input_scale(var1) - .set_input_b(var2) - .set_input_mean(var3) - .set_input_variance(var4) - .set_attr_mode(1) - .set_attr_epsilon(1e-5) - .set_attr_is_training(true) - .set_attr_is_training_fusion(true) - .set_attr_moving_average_fraction(994352128); - - auto relu = op::Relu().set_input_x(bn, "y"); - - auto maxpool = op::MaxPoolWithArgmax() - .set_input_x(relu, "y") - .set_attr_ksize({1, 3, 3, 1}) - .set_attr_padding("SAME") - .set_attr_strides({1, 2, 2, 1}); - - MAKE_RESNET50(maxpool); - std::vector inputs{data}; //,var,var1,layer1_blk1_bn1_b,var3,var4}; - std::vector outputs{}; - - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -#define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val) \ - uint32_t OPNUM##_size = desc.GetShape().GetShapeSize(); \ - Tensor OPNUM##_tensor; \ - OPNUM##_tensor.SetTensorDesc(desc); \ - if (desc.GetDataType() == DT_FLOAT) { \ - float *OPNUM##_data = new float[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float)); \ - delete[] OPNUM##_data; \ - } \ - if (desc.GetDataType() == DT_INT64) { \ - int64_t *OPNUM##_data = new int64_t[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \ - delete[] OPNUM##_data; \ - } \ - auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor); \ - OPNUM##_constant.update_output_desc_y(desc); - -#define GENERATE_VAR_LAYER(OPNUM, desc, input) \ - auto OPNUM##_weight = op::Variable(string(#OPNUM)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -#define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name) \ - auto OPNUM##_weight = op::Variable(string(name)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -int BuildInitVarGraph(Graph &graph) { - std::vector inputs{}; - std::vector outputs{}; - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_var, desc, inputs); - - GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var1, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var2, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var3, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var4, desc1, inputs); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01); - GENERATE_VAR_LAYER(var5, desc2, inputs); - GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01); - GENERATE_VAR_LAYER(var6, desc2, inputs); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1); - GENERATE_VAR_LAYER(label1, desclabel, inputs); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar, descmatlabel, inputs); - GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs); - - MAKE_RESNET50_VAR(inputs); - - TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64); - - GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100); - GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop"); - GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0); - GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond"); - GENERATE_CONSTANT_USE_DESC(one, ctrl, 1); - GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one"); - GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0); - GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero"); - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} -int TestBuildGraphTest(Func fun, Graph &graph, vector &inputs, vector &outputs) { - bool graph_ret = fun(graph); - ge::Tensor shapeTensor; - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape = shape_desc.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape); - shapeTensor.SetTensorDesc(shape_desc); - vector dataValuec; - for (int i = 0; i < sizeshape; i++) { - dataValuec.push_back(1); - } - - shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape); - inputs.push_back(shapeTensor); - - ge::Tensor shapeTensor1; - TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape1); - shapeTensor1.SetTensorDesc(shape_desc1); - vector dataValuec1; - for (int i = 0; i < sizeshape1; i++) { - dataValuec1.push_back(1); - } - - shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1); - - return 0; -} -int runTrainGraph(Func fun, int loopCount) { - printf("GE BBIT begin...\n"); - std::chrono::system_clock::time_point start = std::chrono::system_clock::now(); - - std::map ge_options = { - {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}}; - - std::map session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - - ge::Status ret; - - // init ge - ret = GEInitialize_api_new("train", "fe,plugin"); - printf("ge::GEInitialize ret:%d\n", ret); - - // init session - ge::Session session(session_options); - - int graphId_initvar = 1; - ge::Graph graph_initvar("initVarGraph"); - bool graph_ret = BuildInitVarGraph(graph_initvar); - - // session addgraph - int graphId = 0; - - // build graph - ge::Graph graph("bigGraph"); - std::vector inputs; - ge::Tensor outputTensor; - std::vector outputs; - graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs); - printf("TestReluGrad ret:%d\n", graph_ret); - - ret = session.AddGraph(graphId_initvar, graph_initvar); - printf("session.AddVarGraph ret:%d\n", ret); - if (ret) return ret; - - ret = session.AddGraph(graphId, graph); - printf("session.AddGraph ret:%d\n", ret); - if (ret) return ret; - - std::vector inputs1; - std::vector outputs1; - ret = session.RunGraph(graphId_initvar, inputs1, outputs1); - - if (ret != SUCCESS) { - return ret; - } - // add loop for test of stabilty: - for (int i = 0; i < loopCount; i++) { - // session rungraph - printf("loopCount:%d\n", loopCount); - ret = session.RunGraph(graphId, inputs, outputs); - printf("session.RunGraph ret:%d\n", ret); - if (ret) return ret; - - // define 99999 as loop forever - if (loopCount == 99999) i = 0; - } - std::chrono::system_clock::time_point end = std::chrono::system_clock::now(); - auto millisecondsduration = std::chrono::duration_cast(end - start); - auto ms = millisecondsduration.count(); - std::stringstream ss; - ss << ms << "ms"; - std::string run_time = ss.str(); - printf("run time is : %s \n", run_time.c_str()); - - return 0; -} - -int main(int argc, char *argv[]) { - // add loop for test of stabilty: - int loopCount = 1; - if (argc >= 2) loopCount = atoi(argv[1]); - - Status ret = SUCCESS; - ret = runTrainGraph(resnet50, loopCount); - if (ret == SUCCESS) { - std::cout << "[train resnet50 success]" << std::endl; - } else { - std::cout << "!!! train resnet50 fail !!!" << std::endl; - } - return ret; -} diff --git a/tests/st/test_ge_st.py b/tests/st/test_ge_st.py deleted file mode 100644 index b5479cfc..00000000 --- a/tests/st/test_ge_st.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -ge st test. -""" -import pytest -import subprocess -import os - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_card -@pytest.mark.component_ge -def test_resnet50_train(): - ge_st_dir=os.environ.get('GE_ST_DIR', - '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - ge_lib_dir=os.environ.get('GRAPHENGINE_LIB', '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - - real_pythonpath=os.environ.get('REAL_PYTHONPATH') - pythonpath=os.environ.get('PYTHONPATH') - if real_pythonpath: - if pythonpath: - os.environ['PYTHONPATH']=real_pythonpath+':'+pythonpath - else: - os.environ['PYTHONPATH']=real_pythonpath - print('PYTHONPATH: '+os.environ.get('PYTHONPATH')) - - os.environ['ASCEND_OPP_PATH']='/usr/local/Ascend/opp' - os.environ['ASCEND_ENGINE_PATH']='/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so:'+ \ - ge_lib_dir + '/libge_local_engine.so' - print('ASCEND_OPP_PATH: '+os.environ.get('ASCEND_OPP_PATH')) - print('ASCEND_ENGINE_PATH: '+os.environ.get('ASCEND_ENGINE_PATH')) - print('LD_LIBRARY_PATH: '+os.environ.get('LD_LIBRARY_PATH')) - - cmd=ge_st_dir + '/st_resnet50_train' - print('cmd: '+cmd) - os.environ['SLOG_PRINT_TO_STDOUT']="1" - ret=subprocess.call([cmd], shell=True) - assert ret==0 - From e9e5dd7b9dcaf898a5d4e942a50ac0f2deb05bf6 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Mon, 7 Dec 2020 17:14:14 +0800 Subject: [PATCH 029/127] fix geruntime missing files and error codes --- ge/ge_runtime/CMakeLists.txt | 3 +++ ge/ge_runtime/runtime_model.cc | 4 ++-- ge/ge_runtime/task/task.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index 42d3b344..ce1b89ea 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -13,6 +13,9 @@ set(GE_SRC_LIST "task/hccl_task.cc" "task/memcpy_async_task.cc" "task/profiler_task.cc" + "task/label_goto_task.cc" + "task/label_set_task.cc" + "task/label_switch_task.cc" ) add_library(ge_runtime SHARED ${GE_SRC_LIST}) diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index 0ff56ef1..fb0f3e85 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -307,8 +307,8 @@ bool RuntimeModel::Run() { ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { - if (ret == RT_ERROR_END_OF_SEQUENCE) { - GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); + if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) { + GELOGI("Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received, ret = 0x%X", ret); return true; } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h index 6c4df248..c255fd22 100644 --- a/ge/ge_runtime/task/task.h +++ b/ge/ge_runtime/task/task.h @@ -24,6 +24,7 @@ #include "runtime/rt_model.h" #include "ge_runtime/model_context.h" #include "ge_runtime/task_info.h" +#include "external/runtime/rt_error_codes.h" namespace ge { namespace model_runner { From b8e82bb16e1b72d5aee31a6f7354d4efa0b4f4e3 Mon Sep 17 00:00:00 2001 From: lichun Date: Mon, 7 Dec 2020 19:57:03 +0800 Subject: [PATCH 030/127] inference supports dynamic shape --- ge/common/ge/op_tiling_manager.cc | 4 + ge/common/ge/op_tiling_manager.h | 1 + ge/executor/CMakeLists.txt | 94 ++++++++- ge/executor/ge_executor.cc | 73 ++++++- ge/executor/module.mk | 84 +++++++- ge/ge_local_engine/CMakeLists.txt | 2 +- ge/ge_local_engine/engine/host_cpu_engine.cc | 10 +- ge/ge_local_engine/engine/host_cpu_engine.h | 2 +- ge/graph/build/graph_builder.cc | 51 +++++ ge/graph/load/graph_loader.cc | 7 +- ge/graph/load/graph_loader.h | 3 +- .../load/new_model_manager/davinci_model.cc | 88 ++++---- .../load/new_model_manager/davinci_model.h | 7 +- .../load/new_model_manager/model_manager.cc | 48 ++++- .../load/new_model_manager/model_manager.h | 5 +- ge/graph/partition/dynamic_shape_partition.cc | 47 ++++- ge/graph/partition/dynamic_shape_partition.h | 1 + ge/graph/passes/pass_utils.cc | 4 - .../passes/transop_breadth_fusion_pass.cc | 2 +- ge/host_cpu_engine/CMakeLists.txt | 6 +- ge/host_kernels/floordiv_kernel.cc | 4 +- ge/host_kernels/floordiv_kernel.h | 4 - ge/host_kernels/ssd_prior_box_kernel.cc | 6 +- ge/hybrid/executor/hybrid_execution_context.h | 2 +- .../executor/hybrid_model_async_executor.cc | 38 ++++ .../executor/hybrid_model_async_executor.h | 5 + ge/hybrid/executor/hybrid_profiler.h | 2 +- ge/hybrid/executor/node_state.h | 2 +- ge/hybrid/hybrid_davinci_model.cc | 79 ++++++++ ge/hybrid/hybrid_davinci_model.h | 21 ++ ge/hybrid/hybrid_davinci_model_stub.cc | 32 +++ ge/hybrid/model/hybrid_model.cc | 188 +++++++++++++++++- ge/hybrid/model/hybrid_model.h | 26 +++ ge/hybrid/model/hybrid_model_builder.cc | 31 ++- .../node_executor/aicore/aicore_op_task.cc | 56 ++++++ .../node_executor/aicore/aicore_op_task.h | 1 + .../aicore/aicore_task_compiler.h | 2 +- .../node_executor/aicpu/aicpu_node_executor.h | 2 + .../controlop/control_op_executor.h | 1 + .../ge_local/ge_local_node_executor.cc | 2 +- .../host_cpu/kernel/assign_kernel.cc | 1 - ge/hybrid/node_executor/node_executor.cc | 1 - .../partitioned_call_node_executor.h | 1 - ge/hybrid/node_executor/task_context.h | 2 +- inc/framework/executor/ge_executor.h | 16 ++ 45 files changed, 976 insertions(+), 88 deletions(-) diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index 9b5ba2d7..db959368 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() { } } +OpTilingManager &OpTilingManager::GetInstance() { + static OpTilingManager instance; + return instance; +} } // namespace ge diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index d4e7f34e..17761969 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -25,6 +25,7 @@ using SoToHandleMap = std::map; class OpTilingManager { public: OpTilingManager() = default; + static OpTilingManager &GetInstance(); ~OpTilingManager(); void LoadSo(); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index de8025f3..d7dfdc84 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -72,7 +72,89 @@ set(SRC_LIST "../single_op/task/tbe_task_builder.cc" "../single_op/task/aicpu_task_builder.cc" "../single_op/task/aicpu_kernel_task_builder.cc" - "../hybrid/hybrid_davinci_model_stub.cc" + "../hybrid/common/tensor_value.cc" + "../hybrid/common/npu_memory_allocator.cc" + "../hybrid/executor/rt_callback_manager.cc" + "../hybrid/executor/node_state.cc" + "../hybrid/executor/node_done_manager.cc" + "../hybrid/executor/hybrid_profiler.cc" + "../hybrid/executor/hybrid_model_executor.cc" + "../hybrid/executor/hybrid_model_async_executor.cc" + "../hybrid/executor/hybrid_execution_context.cc" + "../hybrid/executor/subgraph_context.cc" + "../hybrid/executor/subgraph_executor.cc" + "../hybrid/executor/worker/task_compile_engine.cc" + "../hybrid/executor/worker/shape_inference_engine.cc" + "../hybrid/executor/worker/execution_engine.cc" + "../hybrid/model/hybrid_model.cc" + "../hybrid/model/hybrid_model_builder.cc" + "../hybrid/model/node_item.cc" + "../hybrid/model/graph_item.cc" + "../hybrid/node_executor/aicore/aicore_node_executor.cc" + "../hybrid/node_executor/aicore/aicore_op_task.cc" + "../hybrid/node_executor/aicore/aicore_task_builder.cc" + "../hybrid/node_executor/aicpu/aicpu_node_executor.cc" + "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" + "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" + "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" + "../hybrid/node_executor/host_cpu/kernel_factory.cc" + "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/controlop/control_op_executor.cc" + "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" + "../hybrid/node_executor/rts/rts_node_executor.cc" + "../hybrid/node_executor/node_executor.cc" + "../hybrid/node_executor/task_context.cc" + "../hybrid/hybrid_davinci_model.cc" + "../ge_local_engine/engine/host_cpu_engine.cc" + "../graph/common/omg_util.cc" + "../graph/manager/host_mem_manager.cc" + "../graph/build/memory/var_mem_assign_util.cc" + "../host_kernels/transpose_kernel.cc" + "../host_kernels/add_kernel.cc" + "../host_kernels/broadcast_args_kernel.cc" + "../host_kernels/broadcast_gradient_args_kernel.cc" + "../host_kernels/cast_kernel.cc" + "../host_kernels/concat_offset_kernel.cc" + "../host_kernels/concat_v2_kernel.cc" + "../host_kernels/dynamic_stitch_kernel.cc" + "../host_kernels/identity_kernel.cc" + "../host_kernels/empty_kernel.cc" + "../host_kernels/expanddims_kernel.cc" + "../host_kernels/fill_kernel.cc" + "../host_kernels/floordiv_kernel.cc" + "../host_kernels/floormod_kernel.cc" + "../host_kernels/gather_v2_kernel.cc" + "../host_kernels/greater_kernel.cc" + "../host_kernels/kernel_utils.cc" + "../host_kernels/maximum_kernel.cc" + "../host_kernels/mul_kernel.cc" + "../host_kernels/pack_kernel.cc" + "../host_kernels/permute_kernel.cc" + "../host_kernels/range_kernel.cc" + "../host_kernels/rank_kernel.cc" + "../host_kernels/reduce_prod_kernel.cc" + "../host_kernels/reshape_kernel.cc" + "../host_kernels/rsqrt_kernel.cc" + "../host_kernels/shape_kernel.cc" + "../host_kernels/shape_n_kernel.cc" + "../host_kernels/size_kernel.cc" + "../host_kernels/slice_d_kernel.cc" + "../host_kernels/slice_kernel.cc" + "../host_kernels/squeeze_kernel.cc" + "../host_kernels/unsqueeze_kernel.cc" + "../host_kernels/ssd_prior_box_kernel.cc" + "../host_kernels/strided_slice_kernel.cc" + "../host_kernels/sub_kernel.cc" + "../host_kernels/transdata_kernel.cc" + "../host_kernels/unpack_kernel.cc" + "../graph/passes/pass_utils.cc" + "../graph/common/bcast.cc" + "../common/fp16_t.cc" + "../common/formats/format_transfers/format_transfer_transpose.cc" + "../common/formats/utils/formats_trans_utils.cc" ) ######## libge_executor.a ######## @@ -105,9 +187,9 @@ target_include_directories(ge_executor PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor PRIVATE @@ -147,9 +229,9 @@ target_include_directories(ge_executor_shared PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor_shared PRIVATE @@ -158,7 +240,7 @@ target_link_libraries(ge_executor_shared PRIVATE -Wl,--no-as-needed ge_common runtime - slog + slog mmpa graph register diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index add95372..3e916916 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -39,6 +39,8 @@ #include "graph/manager/graph_var_manager.h" #include "graph/load/new_model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "graph/opsproto_manager.h" +#include "ge_local_engine/engine/host_cpu_engine.h" using std::string; using std::vector; @@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener { std::shared_ptr listener; }; +static void InitOpsProtoManger() { + string opsproto_path; + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + string path = path_env; + string file_path = RealPath(path.c_str()); + if (file_path.empty()) { + GELOGE(FAILED, "File path %s is invalid.", path.c_str()); + return; + } + opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); + GELOGI("Get opsproto so path from env : %s", path.c_str()); + } else { + string path_base = PluginManager::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); + } + + GELOGI("Get opsproto path is %s", opsproto_path.c_str()); + OpsProtoManager *manager = OpsProtoManager::Instance(); + map option_tmp; + option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); + (void)manager->Initialize(option_tmp); +} + GeExecutor::GeExecutor() {} Status GeExecutor::Initialize() { @@ -230,6 +259,16 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } + OpTilingManager::GetInstance().LoadSo(); + + Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); + if (initHostCpuEngineStatus != SUCCESS) { + GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); + return initHostCpuEngineStatus; + } + + InitOpsProtoManger(); + std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); @@ -600,10 +639,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return ACL_ERROR_GE_INTERNAL_ERROR; } - std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); - if (davinci_model != nullptr) { - uint64_t session_id = davinci_model->GetSessionId(); + std::shared_ptr hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + uint64_t session_id = hybrid_davinci_model->GetSessionId(); VarManagerPool::Instance().RemoveVarManager(session_id); + } else { + std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); + if (davinci_model != nullptr) { + uint64_t session_id = davinci_model->GetSessionId(); + VarManagerPool::Instance().RemoveVarManager(session_id); + } } ret = GraphLoader::UnloadModel(model_id); if (ret != SUCCESS) { @@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat */ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, ge::RunModelData &run_output_data, bool async_mode) { + std::vector input_desc = {}; + std::vector output_desc = {}; + return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); +} + +/** +* @ingroup ge +* @brief Synchronous execution of offline model(Do not create thread) +* @param [in] uint32_t model_id: Model ID to execute + void* stream: stream to execute + const domi::InputData *input_data: Model input data + const std::vector &input_desc: Description of model input data + bool async_mode: is asynchronize mode +* @param [out] domi::OutputData *output_data: Model output data +* @param [out] std::vector &output_desc: Description of model output data +* @return SUCCESS handle successfully / others handle failed +*/ +Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, + const std::vector &input_desc, ge::RunModelData &run_output_data, + std::vector &output_desc, bool async_mode) { if (!isInit_) { GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return ACL_ERROR_GE_EXEC_NOT_INIT; @@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel } } - return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); + return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); } /** diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 4a0188be..9566ca64 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -61,9 +61,91 @@ local_ge_executor_src_files := \ ../single_op/task/tbe_task_builder.cc \ ../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_kernel_task_builder.cc \ - ../hybrid/hybrid_davinci_model_stub.cc\ ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ ../graph/common/local_context.cc \ + ../hybrid/common/tensor_value.cc \ + ../hybrid/common/npu_memory_allocator.cc \ + ../hybrid/executor/rt_callback_manager.cc \ + ../hybrid/executor/node_state.cc \ + ../hybrid/executor/node_done_manager.cc \ + ../hybrid/executor/hybrid_profiler.cc \ + ../hybrid/executor/hybrid_model_executor.cc \ + ../hybrid/executor/hybrid_model_async_executor.cc \ + ../hybrid/executor/hybrid_execution_context.cc \ + ../hybrid/executor/subgraph_context.cc \ + ../hybrid/executor/subgraph_executor.cc \ + ../hybrid/executor/worker/task_compile_engine.cc \ + ../hybrid/executor/worker/shape_inference_engine.cc \ + ../hybrid/executor/worker/execution_engine.cc \ + ../hybrid/model/hybrid_model.cc \ + ../hybrid/model/hybrid_model_builder.cc \ + ../hybrid/model/node_item.cc \ + ../hybrid/model/graph_item.cc \ + ../hybrid/node_executor/aicore/aicore_node_executor.cc \ + ../hybrid/node_executor/aicore/aicore_op_task.cc \ + ../hybrid/node_executor/aicore/aicore_task_builder.cc \ + ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \ + ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ + ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \ + ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ + ../hybrid/node_executor/host_cpu/kernel_factory.cc \ + ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/controlop/control_op_executor.cc \ + ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ + ../hybrid/node_executor/rts/rts_node_executor.cc \ + ../hybrid/node_executor/node_executor.cc \ + ../hybrid/node_executor/task_context.cc \ + ../hybrid/hybrid_davinci_model.cc \ + ../ge_local_engine/engine/host_cpu_engine.cc \ + ../graph/common/omg_util.cc \ + ../graph/manager/host_mem_manager.cc \ + ../graph/build/memory/var_mem_assign_util.cc \ + ../host_kernels/transpose_kernel.cc \ + ../host_kernels/add_kernel.cc \ + ../host_kernels/broadcast_args_kernel.cc \ + ../host_kernels/broadcast_gradient_args_kernel.cc \ + ../host_kernels/cast_kernel.cc \ + ../host_kernels/concat_offset_kernel.cc \ + ../host_kernels/concat_v2_kernel.cc \ + ../host_kernels/dynamic_stitch_kernel.cc \ + ../host_kernels/identity_kernel.cc \ + ../host_kernels/empty_kernel.cc \ + ../host_kernels/expanddims_kernel.cc \ + ../host_kernels/fill_kernel.cc \ + ../host_kernels/floordiv_kernel.cc \ + ../host_kernels/floormod_kernel.cc \ + ../host_kernels/gather_v2_kernel.cc \ + ../host_kernels/greater_kernel.cc \ + ../host_kernels/kernel_utils.cc \ + ../host_kernels/maximum_kernel.cc \ + ../host_kernels/mul_kernel.cc \ + ../host_kernels/pack_kernel.cc \ + ../host_kernels/permute_kernel.cc \ + ../host_kernels/range_kernel.cc \ + ../host_kernels/rank_kernel.cc \ + ../host_kernels/reduce_prod_kernel.cc \ + ../host_kernels/reshape_kernel.cc \ + ../host_kernels/rsqrt_kernel.cc \ + ../host_kernels/shape_kernel.cc \ + ../host_kernels/shape_n_kernel.cc \ + ../host_kernels/size_kernel.cc \ + ../host_kernels/slice_d_kernel.cc \ + ../host_kernels/slice_kernel.cc \ + ../host_kernels/squeeze_kernel.cc \ + ../host_kernels/unsqueeze_kernel.cc \ + ../host_kernels/ssd_prior_box_kernel.cc \ + ../host_kernels/strided_slice_kernel.cc \ + ../host_kernels/sub_kernel.cc \ + ../host_kernels/transdata_kernel.cc \ + ../host_kernels/unpack_kernel.cc \ + ../graph/passes/pass_utils.cc \ + ../graph/common/bcast.cc \ + ../common/fp16_t.cc \ + ../common/formats/format_transfers/format_transfer_transpose.cc \ + ../common/formats/utils/formats_trans_utils.cc \ local_ge_executor_c_include := \ proto/insert_op.proto \ diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 76590172..615a968f 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES ) ############ libge_local_opskernel_builder.a ############ -add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) +add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_local_opskernel_builder_static PRIVATE -Werror diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index b14cbb3d..c836d4d6 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { - if (dlclose(handle) != 0) { - GELOGW("failed to close handle, message: %s", dlerror()); + if (mmDlclose(handle) != 0) { + GELOGW("failed to close handle, message: %s", mmDlerror()); } } lib_handles_.clear(); @@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector &lib_paths) { Status HostCpuEngine::LoadLib(const std::string &lib_path) { GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); - auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); if (handle == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror()); + GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror()); return INTERNAL_ERROR; } - auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); + auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize"); if (initialize != nullptr) { GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); if (initialize(HostCpuContext()) != SUCCESS) { diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index cc6b578c..0b99ecac 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -20,7 +20,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "register/register.h" +#include "external/../register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 0fa1e1ee..19c0083c 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -30,6 +30,7 @@ #include "model/ge_model.h" #include "graph/ge_context.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "graph/utils/op_desc_utils.h" using domi::BuildMode; @@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); } +static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, + const std::vector &in_anchors, const std::string &name) { + GE_CHECK_NOTNULL(out_anchor); + NodePtr in_node = out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); + OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) + .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) + .Build(); + (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); + if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +static Status GenerateTaskForConstant(const std::shared_ptr &graph) { + for (auto &node : graph->GetDirectNode()) { + // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == NETOUTPUT) { + for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + + std::string in_node_op_type = in_node->GetType(); + if (in_node_op_type == CONSTANT) { + GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); + std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; + if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { + GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); + return FAILED; + } + } + } + } + } + return SUCCESS; +} + Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, @@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { continue; } + + GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); + if (sub_graph->GetGraphUnknownFlag()) { // unknown shape build flow GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 2eeecc0f..aa825a5d 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da /// @param [in] stream stream to execute model on /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data +/// @param [in] input_desc description of model input data /// @param [out] output_data model output data +/// @param [out] output_desc description of model output data /// Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data) { + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); + Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); if (ret != SUCCESS) { GELOGE(ret, "Execute model failed, model_id:%u.", model_id); return ret; diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index b581f2fa..974af5c1 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -65,7 +65,8 @@ class GraphLoader { const std::vector &output_queue_ids); static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data); + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc); static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index c660f797..37b1fb4f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetWeight(); std::size_t weights_size = weights.GetSize(); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); - if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + if ((weight_ptr != nullptr) && (weight_size < weights_size)) { + GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); return FAILED; } - if ((weight_ptr != nullptr) && (weight_size < weights_size)) { - GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); + weights_mem_base_ = static_cast(dev_ptr); + is_inner_weight_base_ = false; + + if (weights_size != 0) { + weights_mem_base_ = static_cast(weight_ptr); + is_inner_weight_base_ = false; + if (weight_ptr == nullptr) { + weights_mem_base_ = MallocWeightsMem(weights_size); + if (weights_mem_base_ == nullptr) { + GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; + } + is_inner_weight_base_ = true; + } + GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + weights_mem_base_, weights_size); + GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); + GELOGI("copy weights data to device"); + } + + runtime_param_.weight_base = weights_mem_base_; + return SUCCESS; +} + + +Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { + if (is_feature_map_mem_has_inited_) { + GELOGE(FAILED, "call InitFeatureMapMem more than once ."); + return FAILED; + } + is_feature_map_mem_has_inited_ = true; + + std::size_t data_size = TotalMemSize(); + std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; + + if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { + GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); return FAILED; } mem_base_ = static_cast(dev_ptr); p2p_mem_base_ = static_cast(dev_ptr); - weights_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; - is_inner_weight_base_ = false; if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); @@ -298,12 +330,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } - GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); - weights_mem_base_ = mem_base_; + if (!is_inner_weight_base_) { + weights_mem_base_ = mem_base_; + is_inner_weight_base_ = true; + } is_inner_mem_base_ = true; - is_inner_weight_base_ = true; } if (p2p_data_size != 0) { @@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); return GE_EXEC_ALLOC_P2P_MEM_FAILED; } - GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); is_inner_p2p_mem_base_ = true; } - if (weights_size != 0) { - weights_mem_base_ = static_cast(weight_ptr); - is_inner_weight_base_ = false; - if (weight_ptr == nullptr) { - weights_mem_base_ = MallocWeightsMem(weights_size); - if (weights_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); - return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; - } - is_inner_weight_base_ = true; - } - GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - weights_mem_base_, weights_size); - GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); - } - GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; @@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_TIMESTAMP_START(InitModelMem); GELOGD("Known node is %d", known_node_); + GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size)); if (!known_node_) { - GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); + GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); data_inputer_ = new (std::nothrow) DataInputer(); GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); } @@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); } + return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 893c3d49..650f19eb 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -584,7 +584,8 @@ class DavinciModel { Status SyncVarData(); - Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); + Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); + Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); @@ -850,7 +851,9 @@ class DavinciModel { Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); - bool is_model_has_inited_; + bool is_weight_mem_has_inited_; + bool is_feature_map_mem_has_inited_; + uint32_t model_id_; uint32_t runtime_model_id_; string name_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..6f20f63d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -31,6 +31,7 @@ #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" +#include "hybrid/hybrid_davinci_model.h" namespace ge { thread_local uint32_t device_count = 0; @@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { std::lock_guard lock(map_mutex_); + auto hybrid_davinci_model = hybrid_model_map_.find(model_id); + if (hybrid_davinci_model != hybrid_model_map_.end()) { + uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); + DestroyAicpuSession(session_id); + return SUCCESS; + } + auto it = model_map_.find(model_id); if (it == model_map_.end()) { GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); @@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &output_desc, std::vector &inputFormats, std::vector &outputFormats, bool new_model_desc) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->SetModelDescVersion(new_model_desc); + return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); @@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector> &batch_info, int32_t &dynamic_type) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); @@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector &user_input_shape_order) { + auto hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); + return SUCCESS; + } + auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) @@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector & } Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); + return SUCCESS; + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); @@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data input data +/// @param [in] input_desc description of input data /// @param [out] output_data output data +/// @param [out] output_desc description of output data /// Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data) { + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + auto inputs = input_data.blobs; + auto outputs = output_data.blobs; + + Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); + if (status == SUCCESS) { + GELOGI("Execute model %u success.", model_id); + } + return status; + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9821a4ab..e3780d5b 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data + /// @param [in] input_desc description of model input data /// @param [out] output_data model output data + /// @param [out] output_desc description of model output data /// ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data); + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc); ge::Status SyncExecuteModel(uint32_t model_id, const std::vector &inputs, std::vector &outputs); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 87fac994..95f13b6f 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -26,6 +26,7 @@ #include #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "framework/common/types.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" @@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() { } REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); - + REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!"); DumpGraph("_Before_DSP"); auto status = PartitionImpl(); GELOGD("%s.", DebugString().c_str()); @@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() { return status; } +Status DynamicShapePartitioner::CtrlEdgeTransfer() { + GELOGD("Do ctrl edge transfer start!"); + GE_CHECK_NOTNULL(root_graph_); + + bool is_dynamic_shape = false; + (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); + if (!is_dynamic_shape) { + return SUCCESS; + } + for (auto &subgraph : root_graph_->GetAllSubgraphs()) { + for (ge::NodePtr &n : subgraph->GetDirectNode()) { + auto op_desc = n->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == CONSTANT || op_type == CONSTANTOP) { + if (n->GetInAllNodes().empty()) { + GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str()); + continue; + } + + GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str()); + + for (auto &in_control_node : n->GetInControlNodes()) { + GE_CHECK_NOTNULL(in_control_node); + GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), + n->GetInControlAnchor()), "remove edge failed"); + for (auto &out_node : n->GetOutNodes()) { + if (out_node == nullptr) { + continue; + } + GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), + out_node->GetInControlAnchor()), "add edge failed."); + } + } + } + } + } + + GELOGD("Do ctrl edge transfer end!"); + return SUCCESS; +} + Status DynamicShapePartitioner::PartitionImpl() { REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index b0477ae8..9772615e 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -151,6 +151,7 @@ class DynamicShapePartitioner { Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); + Status CtrlEdgeTransfer(); ge::ComputeGraphPtr root_graph_; // The original graph to partition std::unordered_map> node_2_cluster_; // Record nodes and the cluster it belongs to // topological sorted clusters, this field will change with the splitting. diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index 5359ff63..3adfbde3 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -37,10 +37,6 @@ #include "graph/utils/type_utils.h" namespace ge { -namespace { -const uint32_t kShapeDimSize = 1; -const uint32_t DIM_SIZE_TWO = 2; -} // namespace Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector &data, std::vector &v_output, const bool scalar_output) { diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 21fb1eaf..689510f0 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); if (node->GetType() == CAST) { trans_data_type = true; - } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { + } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { trans_format = true; trans_shape = true; } else if (node->GetType() == TRANSDATA) { diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index 02b5f996..97b5a0f5 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRC_LIST "engine/host_cpu_engine.cc" "ops_kernel_store/host_cpu_ops_kernel_info.cc" "ops_kernel_store/op/op_factory.cc" - "ops_kernel_store/op/host_op.cc" + "ops_kernel_store/op/host_op.cc" ) set(CPU_OPS_KERNEL_LIST @@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE set_target_properties(atc_host_cpu_engine PROPERTIES OUTPUT_NAME host_cpu_engine - LIBRARY_OUTPUT_DIRECTORY atclib + LIBRARY_OUTPUT_DIRECTORY atclib ) ############ libhost_cpu_opskernel_builder.so ############ @@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES ) ############ libhost_cpu_opskernel_builder.a ############ -add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) +add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index e254af09..df381212 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector &input, Ge template T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { if ((x_i < static_cast(0)) != (y_i < static_cast(0))) { - T abs_x_i = std::abs(x_i); - T abs_y_i = std::abs(y_i); + T abs_x_i = x_i < 0 ? -x_i : x_i; + T abs_y_i = y_i < 0 ? -y_i : y_i; return static_cast(static_cast(-(abs_x_i + abs_y_i - 1) / abs_y_i)); } else { return static_cast(static_cast(x_i / y_i)); diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h index d3dc3ff7..b8f6dd12 100755 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel { template Status DataCal(const std::vector &input, ge::GeTensorPtr output_ptr); Status ComputeByDataType(DataType data_type, const std::vector &input, GeTensorPtr output_ptr); - - int64_t axis_dim_; - int64_t head_dim_; - int64_t end_dim_; }; } // namespace ge diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index b93a4047..57af4026 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -187,7 +187,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin return PARAM_INVALID; } - uint tmp_value = aspect_ratios_size * min_sizes_size; + uint32_t tmp_value = aspect_ratios_size * min_sizes_size; if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -199,7 +199,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin return PARAM_INVALID; } num_priors = static_cast(tmp_value); - + if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -288,7 +288,7 @@ std::unique_ptr SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l } } - return std::move(output_data); + return output_data; } Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector &v_output) { diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 0910d2c7..0fa5a5d7 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -77,7 +77,7 @@ do { \ RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name, ##__VA_ARGS__) } // namespace hybrid } // namespace ge #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 468a7014..91996ab3 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } +Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc) { + GELOGI("Start to execute model."); + + HybridModelExecutor::ExecuteArgs args; + args.inputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + TensorValue tensor_value(inputs[i].data, inputs[i].length); + args.inputs[i] = tensor_value; + } + GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); + for (const auto &output_tensor_desc : args.output_desc) { + output_desc.emplace_back(*output_tensor_desc); + } + + for (size_t i = 0; i < args.outputs.size(); ++i) { + int64_t output_real_size = 0; + ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get tensor size in bytes failed."); + return FAILED; + } + if (output_real_size > 0) { + if (outputs[i].length < static_cast(output_real_size)) { + GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", + i, outputs[i].length, output_real_size); + return FAILED; + } + GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); + } + outputs[i].length = output_real_size; + } + + return SUCCESS; +} + Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector &outputs) { GELOGD("Start to execute model."); // prepare inputs diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 8de2beb6..21833b0b 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -35,6 +35,11 @@ class HybridModelAsyncExecutor { Status Init(); + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc); + Status Execute(const vector &inputs, vector &outputs); Status Start(const std::shared_ptr &listener); diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index f6027a0b..94a042e4 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -33,7 +33,7 @@ class HybridProfiler { SHAPE_INFERENCE, COMPILE, EXECUTION, - CALLBACK + CALLBACKS }; struct Event { diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 48b2ed72..04f1ee4b 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -27,7 +27,7 @@ namespace ge { namespace hybrid { class NodeTask; -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class ShapeFuture { diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index b6f5bb84..7009331c 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -38,6 +38,14 @@ class HybridDavinciModel::Impl { return SUCCESS; } + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream) { + return executor_.Execute(inputs, input_desc, outputs, output_desc); + } + Status Execute(const vector &inputs, vector &outputs) { return executor_.Execute(inputs, outputs); } @@ -68,6 +76,33 @@ class HybridDavinciModel::Impl { executor_.SetDeviceId(device_id); } + uint64_t GetSessionId() { + return model_.GetSessionId(); + } + + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + return model_.GetDynamicBatchInfo(batch_info, dynamic_type); + } + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + model_.GetUserDesignateShapeOrder(user_input_shape_order); + } + + void GetModelAttr(std::vector &dynamic_output_shape_info) { + model_.GetModelAttr(dynamic_output_shape_info); + } + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); + } + + void SetModelDescVersion(bool is_new_model_desc) { + model_.SetModelDescVersion(is_new_model_desc); + } + private: std::shared_ptr listener_; HybridModel model_; @@ -95,6 +130,14 @@ Status HybridDavinciModel::Init() { return impl_->Init(); } +Status HybridDavinciModel::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, rtStream_t stream) { + GE_CHECK_NOTNULL(impl_); + return impl_->Execute(inputs, input_desc, outputs, output_desc, stream); +} + Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { GE_CHECK_NOTNULL(impl_); return impl_->Execute(inputs, outputs); @@ -132,5 +175,41 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { impl_->SetDeviceId(device_id); } } + +Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + GE_CHECK_NOTNULL(impl_); + return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); +} + +void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + if (impl_ != nullptr) { + impl_->GetUserDesignateShapeOrder(user_input_shape_order); + } +} + +void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { + if (impl_ != nullptr) { + impl_->GetModelAttr(dynamic_output_shape_info); + } +} + +Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + GE_CHECK_NOTNULL(impl_); + return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); +} + +void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { + if (impl_ != nullptr) { + impl_->SetModelDescVersion(is_new_model_desc); + } +} + +uint64_t HybridDavinciModel::GetSessionId() { + GE_CHECK_NOTNULL(impl_); + return impl_->GetSessionId(); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 00a48c1e..5349390c 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -37,6 +37,12 @@ class HybridDavinciModel { Status Init(); + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream); + Status Execute(const vector &inputs, vector &outputs); Status ModelRunStart(); @@ -51,6 +57,21 @@ class HybridDavinciModel { void SetDeviceId(uint32_t device_id); + uint64_t GetSessionId(); + + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); + + void GetModelAttr(std::vector &dynamic_output_shape_info); + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats); + + void SetModelDescVersion(bool is_new_model_desc); + private: HybridDavinciModel() = default; class Impl; diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index b95b9efc..366845c5 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -28,6 +28,14 @@ Status HybridDavinciModel::Init() { return UNSUPPORTED; } +Status HybridDavinciModel::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream) { + return UNSUPPORTED; +} + Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { return UNSUPPORTED; } @@ -52,5 +60,29 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { void HybridDavinciModel::SetDeviceId(uint32_t device_id) { } + +uint64_t HybridDavinciModel::GetSessionId() { + return 0; +} + +Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + return UNSUPPORTED; +} + +void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { +} + +void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { +} + +Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + return UNSUPPORTED; +} + +void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { +} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 59c7be9a..c319b06b 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -21,12 +21,18 @@ #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/model/hybrid_model_builder.h" #include "hybrid/node_executor/node_executor.h" +#include "common/op/ge_op_utils.h" namespace ge { namespace hybrid { +namespace { +const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size +} + HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { } @@ -128,7 +134,187 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c } const string &HybridModel::GetModelName() const { - return model_name_; + return model_name_; +} + +Status HybridModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + // dynamic shape do not need dynamic batch + batch_info = {}; + dynamic_type = -1; + return SUCCESS; +} + +void HybridModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + // dynamic shape do not need dynamic batch + user_input_shape_order = {}; +} + +void HybridModel::GetModelAttr(std::vector &dynamic_output_shape_info) { + dynamic_output_shape_info = {}; +} + +Status HybridModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + auto node_item_list = root_graph_item_->GetInputNodes(); + if (node_item_list.empty()) { + GELOGE(FAILED, "node item list is empty!"); + return FAILED; + } + + GE_CHECK_NOTNULL(node_item_list[0]->node); + GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc()); + if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) { + GELOGE(FAILED, "input size of op is not 1!"); + return FAILED; + } + + GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); + + return SUCCESS; +} + +void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, + InputOutputDescInfo &input) { + for (auto model_input_dim : model_input_dims) { + input.shape_info.dims.push_back(model_input_dim); + } + input.shape_info.shape_ranges = shape_ranges; + return; +} + +void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input) { + std::vector> shape_ranges; + if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { + // When static aipp is set, need to get the model input dims which processed by aipp + vector model_input_dims; + (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); + SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, input); + return; + } + // judge if this data is linked dynamic aipp first, multiply batch has been considered + if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { + vector dynamic_aipp_input_dims; + (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); + SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, input); + return; + } else { + vector input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); + op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); + SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, input); + return; + } +} + +Status HybridModel::GetInputDescInfo(vector &input_desc, std::vector &formats) { + auto node_item_list = root_graph_item_->GetInputNodes(); + for (auto &node_item : node_item_list) { + InputOutputDescInfo input; + + GE_CHECK_NOTNULL(node_item->node); + auto op_desc = node_item->node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); + + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); + + int64_t input_size = 0; + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); + + // support dynamic shape + if (input_size < 0) { + GELOGD("dynamic shape scene, input size is unknown. " + "format=%d, data_type=%d, input_size=%ld", + format, input.data_type, input_size); + input_size = kMemSizeUnknownShape; // -1 + } + + // not support dynamic shape input for now, so input_size here will be not less than zero. + input.size = input_size; + + CreateInputDimsInfo(op_desc, input); + + formats.push_back(format); + input_desc.push_back(input); + } + is_new_model_desc_ = false; + return SUCCESS; +} + +void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { + GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); + Format format = output_desc->GetFormat(); + GeShape shape = output_desc->GetShape(); + std::vector> shape_ranges; + output_desc->GetShapeRange(shape_ranges); + DataType data_type = output_desc->GetDataType(); + format_result = format; + if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK + int64_t k = shape.GetDim(0); // 0: first dim + int64_t c = shape.GetDim(1); // 1: second dim + int64_t h = shape.GetDim(2); // 2: third dim + int64_t w = shape.GetDim(3); // 3: forth dim + output_desc_info.shape_info.dims.push_back(h); + output_desc_info.shape_info.dims.push_back(w); + output_desc_info.shape_info.dims.push_back(c); + output_desc_info.shape_info.dims.push_back(k); + if (shape_ranges.size() == 4) { // 4 dims + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0 + } + format_result = FORMAT_HWCN; + } else { + for (size_t j = 0; j < shape.GetDimNum(); j++) { + output_desc_info.shape_info.dims.push_back(shape.GetDim(j)); + } + output_desc_info.shape_info.shape_ranges = shape_ranges; + } + int64_t tensor_size = 0; + (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); + output_desc_info.size = static_cast(tensor_size); + output_desc_info.data_type = output_desc->GetDataType(); +} + +Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { + std::vector output_desc_list; + GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc + + vector out_node_names; + (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); + + GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode()); + auto op_desc = root_graph_item_->GetOutputNode()->op_desc; + GE_CHECK_NOTNULL(op_desc); + + auto out_size = static_cast(op_desc->GetInputsSize()); + GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); + + for (uint32_t index = 0; index < out_size; ++index) { + string output_name; + std::vector src_name = op_desc->GetSrcName(); + std::vector src_index = op_desc->GetSrcIndex(); + if (out_size == out_node_names.size()) { + bool contains_colon = out_node_names[index].find(":") != std::string::npos; + output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); + } else { + output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); + } + + InputOutputDescInfo output_desc_info; + output_desc_info.name = output_name; + + uint32_t format_result; + CreateOutput(output_desc_list[index], output_desc_info, format_result); + output_desc.push_back(output_desc_info); + formats.push_back(format_result); + } + return SUCCESS; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 11311968..1bc08053 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -83,6 +83,30 @@ class HybridModel { const string &GetModelName() const; + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); + + void GetModelAttr(std::vector &dynamic_output_shape_info); + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &outputFormats); + + Status GetInputDescInfo(vector &input_desc, std::vector &formats); + + void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result); + + Status GetOutputDescInfo(vector &output_desc, std::vector &formats); + + void CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input); + + void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } + + void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, + InputOutputDescInfo &input); + private: friend class HybridModelBuilder; friend class HybridModelAsyncExecutor; @@ -101,6 +125,8 @@ class HybridModel { std::map> subgraph_items_; std::map> node_items_; + bool is_new_model_desc_ = false; // support aipp + // runtime fields uint32_t device_id_ = 0; uint32_t model_id_ = 0; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index cd4c0a83..d519c35b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -27,16 +27,41 @@ #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" +#include "framework/common/debug/ge_log.h" +#include "graph/utils/attr_utils.h" namespace ge { namespace hybrid { namespace { const uint32_t kSubgraphIndex = 0U; const uint32_t kVarOutputIndex = 0U; -const uint32_t kAlignment = 32; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; +Status SetOutputNameAttr(ComputeGraph &graph) { + vector output_names; + for (const auto &node : graph.GetDirectNode()) { + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == NETOUTPUT) { + for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + output_names.push_back(in_node->GetName()); + } + } + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), + GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); + return FAILED); + return SUCCESS; +} + int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { int64_t var_size = 0; auto data_type = desc.GetDataType(); @@ -939,6 +964,10 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr Status HybridModelBuilder::IndexTaskDefs() { const auto &root_graph = ge_root_model_->GetRootGraph(); + if (SetOutputNameAttr(*root_graph) != SUCCESS) { + GELOGW("Set output name attr failed."); + } + for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { auto &name = it.first; auto &ge_model = it.second; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 998afd02..80ea579b 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,6 +19,7 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" +#include "graph/load/new_model_manager/tbe_handle_store.h" using optiling::OpRunInfo; @@ -36,6 +37,58 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) return SUCCESS; } +Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { + auto op_desc_ptr = std::make_shared(op_desc); + GE_CHECK_NOTNULL(op_desc_ptr); + auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); + return INTERNAL_ERROR; + } + TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); + rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); + if (rt_ret != RT_ERROR_NONE) { + void *bin_handle = nullptr; + if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { + GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + rtDevBinary_t binary; + std::string json_string; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), + GELOGI("Get original type of session_graph_id.")); + if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else { + GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); + return PARAM_INVALID; + } + binary.version = 0; + binary.data = tbe_kernel->GetBinData(); + binary.length = tbe_kernel->GetBinDataSize(); + GELOGI("TBE: binary.length: %lu", binary.length); + GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); + std::string meta_data; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), + GELOGI("Get original type of json_string")); + GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); + GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); + kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); + } else { + GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + kernel_store.ReferTBEHandle(stub_name_.c_str()); + } + std::string kernel_name; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), + GELOGI("Get original type of kernel_name")); + GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); + GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); + } + return SUCCESS; +} + Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET(ValidateTaskDef(task_def), "[%s] Failed to validate task def: [%s]", @@ -45,6 +98,9 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); + + GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); + GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 0447ade7..5818f384 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -62,6 +62,7 @@ class AiCoreOpTask { static Status ValidateTaskDef(const domi::TaskDef &task_def); Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); + Status RegisterTbeHandle(const OpDesc &op_desc); std::string stub_name_; void *stub_func_ = nullptr; diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index bf948349..b6dfd82b 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -26,7 +26,7 @@ namespace hybrid { class AiCoreTaskCompiler : public TaskCompiler { public: AiCoreTaskCompiler() = default; - ~AiCoreTaskCompiler() = default; + ~AiCoreTaskCompiler() override = default; Status CompileOp(const NodePtr &node, std::vector &tasks) override; Status Initialize() override; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index b984cc86..1205b190 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -37,6 +37,8 @@ class AicpuNodeTaskBase : public NodeTask { ~AicpuNodeTaskBase() override = default; + using NodeTask::Init; + virtual Status Init(const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h index 7520afd1..3becfaaa 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.h +++ b/ge/hybrid/node_executor/controlop/control_op_executor.h @@ -25,6 +25,7 @@ namespace ge { namespace hybrid { class ControlOpNodeTask : public NodeTask { public: + using NodeTask::Init; virtual Status Init(const NodePtr &node, const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index 7a83641d..a52e5670 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -68,7 +68,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { node_name_.c_str(), node_type_.c_str(), output_num, input_num); return INTERNAL_ERROR; } - for (uint32_t out_index = 0; out_index < output_num; ++out_index) { + for (uint32_t out_index = 0; out_index < static_cast(output_num); ++out_index) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3bf71013..01fd391d 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -20,7 +20,6 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" namespace { -const size_t kAssignInputNum = 2; const size_t kAssignRefInputIndex = 0; const size_t kAssignValueInputIndex = 1; const size_t kAssignRefOutputIndex = 0; diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index e577f09b..95e50c31 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -34,7 +34,6 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; const char *const kEngineNameHccl = "ops_kernel_info_hccl"; const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; -const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h index 9ea544a1..73873002 100644 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h @@ -41,7 +41,6 @@ class PartitionedCallNodeTask : public NodeTask { const GraphItem *graph_item_; std::unique_ptr subgraph_executor_; - GraphExecutionContext *context_ = nullptr; }; class PartitionedCallNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 2cff0536..0549a1dc 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -29,7 +29,7 @@ namespace ge { namespace hybrid { -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class TaskContext { diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 17dbf928..5a73126f 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -234,6 +234,22 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, ge::RunModelData &output_data, bool async_mode = false); + /// + /// @ingroup ge + /// @brief Synchronous execution of offline model(Do not create thread) + /// @param [in] uint32_t model_id: Model ID to execute + /// @param [in] void* stream: stream to execute + /// @param [in] bool async_mode: is asynchronize mode. + /// @param [in] const domi::InputData *input_data: Model input data + /// @param [in] const std::vector &input_desc: description of model input data + /// @param [out] domi::OutputData *output_data: Model output data + /// @param [out] std::vector &output_desc: description of model output data + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, + const std::vector &input_desc, ge::RunModelData &run_output_data, + std::vector &output_desc, bool async_mode = false); + /// /// @ingroup ge /// @brief Get weight memory size from model file From c588b7029c088c0c488c23dbdea47f81a2e4fa0e Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 7 Dec 2020 20:18:32 +0800 Subject: [PATCH 031/127] modify fwk_atc.bin --- ge/offline/CMakeLists.txt | 4 ++-- ge/offline/module.mk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index b3a0d53c..2f9195bc 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -183,11 +183,11 @@ target_link_libraries(fwk_atc.bin PRIVATE c_sec graph error_manager - ge_compiler + ge_runner parser_common gflags json - runtime_compile + runtime slog static_mmpa -lrt diff --git a/ge/offline/module.mk b/ge/offline/module.mk index c14be50f..8018266a 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -149,8 +149,8 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ liberror_manager \ - libge_compiler \ - libruntime_compile \ + libge_runner \ + libruntime \ libparser_common \ liberror_manager \ From 26723f70b1cc80942eeb5e6a284ff9e264bcc89a Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Mon, 7 Dec 2020 20:34:46 +0800 Subject: [PATCH 032/127] unify normal and non-normal process in subgraph --- ge/graph/manager/graph_manager.cc | 63 ++++--------------------------- 1 file changed, 7 insertions(+), 56 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 5b194c44..9ce68d76 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -651,62 +651,13 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { GE_CHECK_NOTNULL(compute_graph); auto sub_graph_map = partitioner.GetSubGraphMap(); - std::string buffer_optimize; - graphStatus graph_status = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); - bool need_lx_fusion = (graph_status == GRAPH_SUCCESS) && (buffer_optimize != kOffOptimize); - if (options_.build_mode.empty() && need_lx_fusion) { - GELOGI("Enter normal mode with buffer_optimize:%s.", buffer_optimize.c_str()); - /// 1. Copy subgraph for buffer optimize while lx fusion failed. - /// 2. Set graph with attr "lx_fusion" for fusion optimize. - std::unordered_map copy_graphs; - GE_TIMESTAMP_START(CopySubGraphAndMarkFusion); - Status ret = CopySubGraphAndMarkFusion(compute_graph, sub_graph_map, copy_graphs); - GE_TIMESTAMP_EVENT_END(CopySubGraphAndMarkFusion, "SetSubgraph:CopySubGraphAndMarkFusion"); - if (ret != SUCCESS) { - GELOGE(ret, "CopySubGraphAndMarkFusion failed."); - return ret; - } - - // Multiply optimize subgraph with lx fusion - ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx fusion failed."); - return ret; - } - - // Check whether all subgraph lx fusion success - GE_TIMESTAMP_START(CheckAllFusionOptimizeSuccess); - if (CheckAllFusionOptimizeSuccess(compute_graph, sub_graph_map)) { - GE_TIMESTAMP_EVENT_END(CheckAllFusionOptimizeSuccess, "SetSubgraph:CheckAllFusionOptimizeSuccess"); - return SUCCESS; - } - - // Replace subgraph with original graph for lx buffer - ret = ReplaceSubgraphWithOriGraph(compute_graph, sub_graph_map, copy_graphs); - if (ret != SUCCESS) { - GELOGE(ret, "Replace subgraph with original graph failed."); - return ret; - } - - // Multiply optimize subgraph with lx buffer - ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx buffer failed."); - return ret; - } - } else { - /// Multiply optimize subgraph: - /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; - /// 2. run lx fusion or buffer according build_mode and build_step in fe. - GELOGD("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", - options_.build_mode.c_str(), - options_.build_step.c_str(), - buffer_optimize.c_str()); - Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); - if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph with lx buffer"); - return ret; - } + GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", + options_.build_mode.c_str(), + options_.build_step.c_str()); + Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); + if (ret != SUCCESS) { + GELOGE(ret, "Multiply optimize subgraph failed"); + return ret; } return SUCCESS; } From 7fa1ca9237d095b57f05ac39870fcbd051d2460e Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Mon, 7 Dec 2020 20:39:54 +0800 Subject: [PATCH 033/127] unify mutex for model_aicpu_kernel_ in different func --- ge/graph/load/new_model_manager/model_manager.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..74c37a1b 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -216,7 +216,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -229,7 +229,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { From 881a165c5d1808b699a4327761d743bb33f20fc6 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Mon, 7 Dec 2020 21:02:16 +0800 Subject: [PATCH 034/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 48 ++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 77d5be51..e997d922 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -225,7 +225,8 @@ class Impl { ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus Init(const std::map &options); + graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); + graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, @@ -278,7 +279,41 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } -graphStatus Impl::Init(const std::map &options) { +graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + string data_op_name = op->GetName(); + GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); + ge::GeTensorDesc tensor = op->GetInputDesc(0); + ge::GeShape data_shape = tensor.GetShape(); + GELOGD("Data op get shape from InputDesc in ge ir graph."); + + string tmp_shape_str; + std::vector tmp_shape = data_shape.GetDims(); + if (tmp_shape.size() == 0) { + GELOGE(GRAPH_PARAM_INVALID, "Data op: %s has zero shapr dims!", data_op_name.c_str()); + return GRAPH_PARAM_INVALID; + } + + tmp_shape_str += data_op_name + ":"; + for (auto tmp_dim : tmp_shape) { + tmp_shape_str += to_string((long)tmp_dim) + ","; + } + tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); + tmp_shape_str += ";"; + default_shape += tmp_shape_str(); + GELOGD("Data op name: %s, data shape: %s", data_op_name.c_str(), tmp_shape_str.c_str()); + } + } + GELOGI("Get default data op shape from ge ir graph: %s", default_shape.c_str()); +} + +graphStatus Impl::Init(const Graph &graph, const std::map &options) { // 1. check options graphStatus ret = CheckOptions(options); if (ret != GRAPH_SUCCESS) { @@ -296,7 +331,12 @@ graphStatus Impl::Init(const std::map &options) { GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); options_[ge::ir_option::LOG_LEVEL] = log; - string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; + string input_shape; + if (options_.find("input_shape") == options_.end()) { + GE_CHK_BOOL_RET_STATUS_NOLOG(GetDefaultInputShape(graph, input_shape), GRAPH_PARAM_INVALID); + } else { + input_shape = options_["input_shape"]; + } string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() @@ -416,7 +456,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &options, ModelBufferData &model) { // 1. init GeGenerator with user optios - graphStatus ret = Init(options); + graphStatus ret = Init(graph, options); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Build ir model Init failed!"); return ret; From b0b8ca9d35811e05ba9208c5abc8c306aed77ff5 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Mon, 7 Dec 2020 21:05:36 +0800 Subject: [PATCH 035/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index e997d922..22a0165a 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -306,7 +306,7 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape } tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); tmp_shape_str += ";"; - default_shape += tmp_shape_str(); + default_shape += tmp_shape_str; GELOGD("Data op name: %s, data shape: %s", data_op_name.c_str(), tmp_shape_str.c_str()); } } From 261a6004fa8c0e71c81eae959756f553cc9c430b Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 8 Dec 2020 14:00:22 +0800 Subject: [PATCH 036/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 22a0165a..b0f1b25e 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -311,6 +311,7 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape } } GELOGI("Get default data op shape from ge ir graph: %s", default_shape.c_str()); + return GRAPH_SUCCESS; } graphStatus Impl::Init(const Graph &graph, const std::map &options) { From cdc0c42a67a35d5c96f3850249e10c0070a866a3 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Tue, 8 Dec 2020 14:15:10 +0800 Subject: [PATCH 037/127] bugfix for attr update failed --- ge/graph/build/graph_builder.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 0fa1e1ee..e742a520 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -461,7 +461,6 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { auto input_desc = node_op_desc->MutableInputDesc(in_data_anchor->GetIdx()); GE_CHECK_NOTNULL(input_desc); (void) ge::TensorUtils::SetSize(*input_desc, size); - GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(), input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); From 07800ed03c601c28edc79a97543f1133177d5f60 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 8 Dec 2020 14:26:01 +0800 Subject: [PATCH 038/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index b0f1b25e..fbacf527 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -334,7 +334,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Tue, 8 Dec 2020 14:56:26 +0800 Subject: [PATCH 039/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index fbacf527..2035dbe0 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -310,6 +310,7 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape GELOGD("Data op name: %s, data shape: %s", data_op_name.c_str(), tmp_shape_str.c_str()); } } + default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); GELOGI("Get default data op shape from ge ir graph: %s", default_shape.c_str()); return GRAPH_SUCCESS; } From a2da82c86fd78c0e199d87fdbf4137b370b53dcd Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 8 Dec 2020 15:33:42 +0800 Subject: [PATCH 040/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 2035dbe0..518ab49b 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -36,6 +36,7 @@ #include "model/ge_model.h" #include "graph/shape_refiner.h" #include "graph/opsproto_manager.h" +#include "graph/utils/type_utils.h" using std::string; using namespace std; @@ -225,7 +226,7 @@ class Impl { ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); + graphStatus GetDefaultInputShapeAndFormat(const Graph &graph, string &default_shape, string &input_format); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); @@ -279,7 +280,7 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } -graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { +graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &default_shape, string &input_format) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { @@ -307,7 +308,11 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); tmp_shape_str += ";"; default_shape += tmp_shape_str; - GELOGD("Data op name: %s, data shape: %s", data_op_name.c_str(), tmp_shape_str.c_str()); + + ge::Format data_format = tensor.GetFormat(); + input_format.assign(ge::TypeUtils::FormatToSerialString(data_format)); + GELOGD("Data op name: %s, data shape: %s, data format: %s.", data_op_name.c_str(), tmp_shape_str.c_str(), + input_format.c_str()); } } default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); @@ -334,13 +339,14 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Sat, 5 Dec 2020 15:07:17 +0800 Subject: [PATCH 041/127] fix cust aicpu --- ge/graph/load/new_model_manager/model_manager.cc | 14 ++++++++++---- ge/graph/load/new_model_manager/model_manager.h | 2 +- .../task_info/kernel_task_info.cc | 4 +++- .../task_info/super_kernel/super_kernel.cc | 2 +- .../node_executor/aicpu/aicpu_node_executor.cc | 8 ++++++-- ge/single_op/task/aicpu_kernel_task_builder.cc | 8 ++++++-- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 74c37a1b..5d9b6e65 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1243,8 +1243,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) { + GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { @@ -1267,18 +1267,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ std::map new_so_name; new_so_name.insert({so_name, aicpu_kernel}); cust_aicpu_so_[resource_id] = new_so_name; - GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { it->second.insert({so_name, aicpu_kernel}); - GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); + return SUCCESS; } + loaded = true; + GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str()); return SUCCESS; } Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { + GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); if (cust_aicpu_so_.size() == 0) return SUCCESS; // get current context diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9821a4ab..c1faed82 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -286,7 +286,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); + ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded); ge::Status LaunchCustAicpuSo(); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 3e3a715d..7b11c53e 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), + "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index e94fa425..a4d14fb0 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,7 +25,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc(reinterpret_cast(device_args_addr_), sizeof(args), RT_MEMORY_HBM); + rtError_t rt_ret = rtMalloc(reinterpret_cast(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 573739bc..38407160 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -644,8 +644,12 @@ Status AicpuNodeTask::Init(const HybridModel &model) { const auto &context = kernel_def.context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), + "load cust aicpu so failed."); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + } } GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 600c9c29..0b459e7a 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -62,8 +62,12 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { if (kernel_type == ccKernelType::CUST_AI_CPU) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), + "launch cust aicpu so failed"); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + } } task.num_inputs_ = op_desc_->GetInputsSize(); From 0dc6b0c629c34a0cd2446488b91e157be96c35fd Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 8 Dec 2020 17:32:32 +0800 Subject: [PATCH 042/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 518ab49b..06954ed5 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -297,17 +297,16 @@ graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &defa string tmp_shape_str; std::vector tmp_shape = data_shape.GetDims(); if (tmp_shape.size() == 0) { - GELOGE(GRAPH_PARAM_INVALID, "Data op: %s has zero shapr dims!", data_op_name.c_str()); - return GRAPH_PARAM_INVALID; - } - - tmp_shape_str += data_op_name + ":"; - for (auto tmp_dim : tmp_shape) { - tmp_shape_str += to_string((long)tmp_dim) + ","; + GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); + } else { + tmp_shape_str += data_op_name + ":"; + for (auto tmp_dim : tmp_shape) { + tmp_shape_str += to_string((long)tmp_dim) + ","; + } + tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); + tmp_shape_str += ";"; + default_shape += tmp_shape_str; } - tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); - tmp_shape_str += ";"; - default_shape += tmp_shape_str; ge::Format data_format = tensor.GetFormat(); input_format.assign(ge::TypeUtils::FormatToSerialString(data_format)); @@ -316,7 +315,7 @@ graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &defa } } default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); - GELOGI("Get default data op shape from ge ir graph: %s", default_shape.c_str()); + GELOGI("Get default data op shape: %s, format: %s from ge ir graph.", default_shape.c_str(), input_format.c_str()); return GRAPH_SUCCESS; } From 3a8999a78a9ac85a31551f36e1577bad0897b332 Mon Sep 17 00:00:00 2001 From: dongduo Date: Tue, 8 Dec 2020 18:16:17 +0800 Subject: [PATCH 043/127] Fix code check --- ge/common/helper/model_helper.cc | 2 +- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- ge/graph/load/new_model_manager/model_manager.cc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 67c4a80e..efb93d8a 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -124,7 +124,7 @@ Status ModelHelper::SaveModelTbeKernel(std::shared_ptr &om_fil ModelPartitionType::TBE_KERNELS, ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(), model_index), - "Add tbe kernel partition failed"); + "Add tbe kernel partition failed"); } // no need to check value, DATA->NetOutput (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index ba7ca226..c6634f4c 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2676,7 +2676,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); GE_CHK_RT_RET(rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..3d8a561c 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1055,8 +1055,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, - "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); + ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", + model.key.c_str(), strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; From 4ddc7323a314efbb5dde8eb02a532b474f87605d Mon Sep 17 00:00:00 2001 From: dongduo Date: Tue, 8 Dec 2020 20:36:20 +0800 Subject: [PATCH 044/127] Fix code check --- ge/graph/load/new_model_manager/model_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 1ba0cfcc..145afa6d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1086,7 +1086,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", + ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); From 4382d783928727ba800e7b2b69d653f46dca0a73 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 8 Dec 2020 20:39:36 +0800 Subject: [PATCH 045/127] Feature: Get default from ge ir graph while no user input shape --- ge/ir_build/ge_ir_build.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 06954ed5..a206a164 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -295,8 +295,8 @@ graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &defa GELOGD("Data op get shape from InputDesc in ge ir graph."); string tmp_shape_str; - std::vector tmp_shape = data_shape.GetDims(); - if (tmp_shape.size() == 0) { + const std::vector &tmp_shape = data_shape.GetDims(); + if (tmp_shape.empty()) { GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); } else { tmp_shape_str += data_op_name + ":"; @@ -341,7 +341,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Wed, 9 Dec 2020 09:56:06 +0800 Subject: [PATCH 046/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index a206a164..c3e557c8 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -385,7 +385,6 @@ graphStatus Impl::Init(const Graph &graph, const std::map(string(IR_OPTION_MODE), to_string(0))); - options_.insert(std::pair(string(IR_OPTION_TARGET), "mini")); options_.insert(std::pair(string(ge::RUN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::TRAIN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), to_string(0))); @@ -425,39 +424,52 @@ void Impl::UpdateThreadContext() { graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); - int64_t index = 0; for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); ge::OpDescPtr op = input_node->GetOpDesc(); GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { - (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); - ge::GeTensorDesc tensor = op->GetInputDesc(0); + auto tensor = op->MutableInputDesc(0); string data_op_name = op->GetName(); GELOGD("Data op name: %s", data_op_name.c_str()); ge::GeShape data_shape; auto iter = omg_context_.input_dims.find(data_op_name); if (iter != omg_context_.input_dims.end()) { data_shape = ge::GeShape(iter->second); - GELOGD("Data op get shape from Context."); + tensor->SetShape(data_shape); + GELOGD("Data op get shape from Context and update [%s] shape info", data_op_name.c_str()); } else { - data_shape = tensor.GetShape(); + data_shape = tensor->GetShape(); GELOGD("Data op get shape from InputDesc in ge ir graph."); } // If user point input format, do work for all data ops; else do according to tensor_desc auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? - ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); - ge::DataType data_type = tensor.GetDataType(); + ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor->GetFormat(); + ge::DataType data_type = tensor->GetDataType(); string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); GELOGD("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); ge::GeTensor inputTensor; ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); inputTensor.SetTensorDesc(desc); - inputs.push_back(inputTensor); + int64_t index = 0; + if (AttrUtils::GetInt(op, ATTR_NAME_INDEX, index)) { + AttrUtils::SetInt(desc, ATTR_NAME_INDEX, index); + } else { + GELOGE(GRAPH_PARAM_INVALID, "Get attr name idx failed!"); + return GRAPH_PARAM_INVALID; + } + inputs.emplace_back(inputTensor); } } + std::sort(inputs.begin(), input.end(), [](ge::GeTensor &a, ge::GeTensor &b) { + int64_t data_idx_a = 0; + int64_t data_idx_b = 0; + AttrUtils::GetInt(a.MutableTensorDesc(), ATTR_NAME_INDEX, data_idx_a); + AttrUtils::GetInt(b.MutableTensorDesc(), ATTR_NAME_INDEX, data_idx_b); + return data_idx_a <= data_idx_b; + }); GELOGD("CreateInputsForIRBuild, inputs size: %zu", inputs.size()); return GRAPH_SUCCESS; } From a7afa5683bb5e487351659d04f7ce80ed44823d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8A=A2?= Date: Tue, 8 Dec 2020 10:11:46 +0800 Subject: [PATCH 047/127] cpplint magic num & define macro --- ge/common/auth/file_saver.cc | 4 +- ge/common/base64.h | 51 ++++++----- .../format_transfer_fractal_nz.cc | 68 +++++++++------ .../format_transfer_fractal_zz.cc | 87 +++++++++++-------- .../format_transfer_transpose.cc | 25 +++--- ge/common/formats/utils/formats_definitions.h | 9 ++ ge/common/ge/tbe_plugin_manager.cc | 4 +- ge/common/util.cc | 83 +++++++++--------- ge/ge_runtime/runtime_model.cc | 3 +- .../load/new_model_manager/model_manager.cc | 13 +-- .../task_info/kernel_task_info.cc | 19 ++-- .../super_kernel/super_kernel_factory.cc | 18 ++-- ge/graph/load/new_model_manager/ts_mem_mall.h | 2 +- ge/graph/manager/graph_caching_allocator.cc | 12 +-- ge/graph/manager/graph_caching_allocator.h | 11 ++- ge/graph/manager/graph_var_manager.cc | 4 +- ge/graph/manager/graph_var_manager.h | 1 + ge/graph/optimize/mem_rw_conflict_optimize.cc | 19 ++-- ge/graph/passes/data_pass.cc | 3 +- ge/graph/passes/for_pass.cc | 3 +- ge/graph/passes/mark_agnostic_pass.cc | 4 +- ge/graph/passes/merge_pass.cc | 6 +- ge/host_kernels/gather_v2_kernel.cc | 12 ++- ge/host_kernels/range_kernel.cc | 9 +- ge/hybrid/common/npu_memory_allocator.cc | 4 +- ge/hybrid/executor/node_done_manager.cc | 2 +- ge/offline/main.cc | 2 +- ge/session/omg.cc | 9 +- ge/single_op/single_op.cc | 3 +- inc/framework/common/fmk_error_codes.h | 8 +- 30 files changed, 302 insertions(+), 196 deletions(-) diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index 91fae074..e708653a 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -54,8 +54,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID); mmSsize_t write_count; - uint32_t size_2g = ((uint32_t) 0x1 << 31); - uint32_t size_1g = ((uint32_t) 0x1 << 30); + uint32_t size_2g = 2147483648; // 0x1 << 31 + uint32_t size_1g = 1073741824; // 0x1 << 30 // Write data if (size > size_2g) { auto seek = reinterpret_cast(const_cast(data)); diff --git a/ge/common/base64.h b/ge/common/base64.h index fb6c1870..a537e585 100644 --- a/ge/common/base64.h +++ b/ge/common/base64.h @@ -25,32 +25,38 @@ namespace ge { namespace { -const char* kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +const char *kBase64Chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; const char kEqualSymbol = '='; const size_t kBase64CharsNum = 64; const size_t kThreeByteOneGroup = 3; const size_t kFourByteOneGroup = 4; -} +const size_t kThreeByteOneGroupIndex0 = 0; +const size_t kThreeByteOneGroupIndex1 = 1; +const size_t kThreeByteOneGroupIndex2 = 2; +const size_t kFourByteOneGroupIndex0 = 0; +const size_t kFourByteOneGroupIndex1 = 1; +const size_t kFourByteOneGroupIndex2 = 2; +const size_t kFourByteOneGroupIndex3 = 3; +} // namespace namespace base64 { -static inline bool IsBase64Char(const char &c) { - return (isalnum(c) || (c == '+') || (c == '/')); -} +static inline bool IsBase64Char(const char &c) { return (isalnum(c) || (c == '+') || (c == '/')); } static std::string EncodeToBase64(const std::string &raw_data) { size_t encode_length = raw_data.size() / kThreeByteOneGroup * kFourByteOneGroup; encode_length += raw_data.size() % kThreeByteOneGroup == 0 ? 0 : kFourByteOneGroup; - size_t raw_data_index = 0 ; + size_t raw_data_index = 0; size_t encode_data_index = 0; std::string encode_data; encode_data.resize(encode_length); for (; raw_data_index + kThreeByteOneGroup <= raw_data.size(); raw_data_index += kThreeByteOneGroup) { auto char_1 = static_cast(raw_data[raw_data_index]); - auto char_2 = static_cast(raw_data[raw_data_index + 1]); - auto char_3 = static_cast(raw_data[raw_data_index + 2]); + auto char_2 = static_cast(raw_data[raw_data_index + kThreeByteOneGroupIndex1]); + auto char_3 = static_cast(raw_data[raw_data_index + kThreeByteOneGroupIndex2]); encode_data[encode_data_index++] = kBase64Chars[char_1 >> 2u]; encode_data[encode_data_index++] = kBase64Chars[((char_1 << 4u) & 0x30) | (char_2 >> 4u)]; encode_data[encode_data_index++] = kBase64Chars[((char_2 << 2u) & 0x3c) | (char_3 >> 6u)]; @@ -80,8 +86,7 @@ static std::string EncodeToBase64(const std::string &raw_data) { #pragma GCC diagnostic ignored "-Wunused-function" static Status DecodeFromBase64(const std::string &base64_data, std::string &decode_data) { if (base64_data.size() % kFourByteOneGroup != 0) { - GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", - base64_data.size()); + GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", base64_data.size()); return PARAM_INVALID; } decode_data.clear(); @@ -92,10 +97,10 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco return static_cast(std::distance(kBase64Chars, char_pos)) & 0xff; }; - for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += 4) { + for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += kFourByteOneGroup) { for (size_t i = 0; i < kFourByteOneGroup; ++i) { if (base64_data[input_data_index + i] == kEqualSymbol && - input_data_index >= base64_data_len - 4 && i > 1) { + input_data_index >= base64_data_len - kFourByteOneGroup && i > 1) { byte_4[i] = kBase64CharsNum; } else if (IsBase64Char(base64_data[input_data_index + i])) { byte_4[i] = FindCharInBase64Chars(base64_data[input_data_index + i]); @@ -104,19 +109,23 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco return PARAM_INVALID; } } - decode_data += static_cast((byte_4[0] << 2u) + ((byte_4[1] & 0x30) >> 4u)); - if (byte_4[2] >= kBase64CharsNum){ + decode_data += + static_cast((byte_4[kFourByteOneGroupIndex0] << 2u) + ((byte_4[kFourByteOneGroupIndex1] & 0x30) >> 4u)); + if (byte_4[kFourByteOneGroupIndex2] >= kBase64CharsNum) { break; - } else if (byte_4[3] >= kBase64CharsNum) { - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + } else if (byte_4[kFourByteOneGroupIndex3] >= kBase64CharsNum) { + decode_data += static_cast(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) + + ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u)); break; } - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); - decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); + decode_data += static_cast(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) + + ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u)); + decode_data += + static_cast(((byte_4[kFourByteOneGroupIndex2] & 0x03) << 6u) + byte_4[kFourByteOneGroupIndex3]); } return SUCCESS; } #pragma GCC diagnostic pop -} +} // namespace base64 } // namespace ge #endif // GE_COMMON_BASE64_H_ \ No newline at end of file diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index ed1c6941..cb528453 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -23,12 +23,30 @@ #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" +#include "framework/common/types.h" #include "graph/utils/type_utils.h" namespace ge { namespace formats { namespace { const int kDimSize4D = 4; + +const size_t kSingleDim = 1; + +const size_t kNdDimIndexN = 0; +const size_t kNdDimIndexH = 1; +const size_t kNdDimIndexW = 2; + +const size_t kDimDValueBNdFNz = 2; // dim d-value between Nd and FractalZz + +const size_t kNdDimCountBackwardsW = 1; +const size_t kNdDimCountBackwardsWH = 2; + +const size_t kFNzDimCountBackwardsW0 = 1; +const size_t kFNzDimCountBackwardsW0H0 = 2; +const size_t kFNzDimCountBackwardsW0H0H1 = 3; +const size_t kFNzDimCountBackwardsW0H0H1W1 = 4; + bool IsDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0; } using ShapeVector = std::vector; @@ -60,14 +78,14 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap auto w0 = GetCubeSizeByDataType(data_type); int64_t h0 = kCubeSize; switch (src_shape.size()) { - case 1: - dst_shape.push_back(Ceil(src_shape[0], w0)); - dst_shape.push_back(1); + case kSingleDim: + dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0)); + dst_shape.push_back(DIM_DEFAULT_VALUE); dst_shape.push_back(h0); dst_shape.push_back(w0); - hw_shape.push_back(1); - hw_shape.push_back(1); - hw_shape.push_back(src_shape[0]); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -76,17 +94,17 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap default: auto size = src_shape.size(); int64_t times = 1; - for (size_t i = 0; i != size - 2; i++) { + for (size_t i = 0; i != size - kDimDValueBNdFNz; i++) { dst_shape.push_back(src_shape[i]); times *= src_shape[i]; } - dst_shape.push_back(Ceil(src_shape[size - 1], w0)); - dst_shape.push_back(Ceil(src_shape[size - 2], h0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0)); dst_shape.push_back(h0); dst_shape.push_back(w0); hw_shape.push_back(times); - hw_shape.push_back(src_shape[size - 2]); - hw_shape.push_back(src_shape[size - 1]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -128,16 +146,16 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con } // src&dst_shape can be written as times*H*W & times*W1*H1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = hw_shape.at(0); - auto h = hw_shape.at(1); - auto w = hw_shape.at(2); + auto times = hw_shape.at(kNdDimIndexN); + auto h = hw_shape.at(kNdDimIndexH); + auto w = hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.dst_shape.size(); - auto w1 = args.dst_shape[shape_size - 4]; - auto h1 = args.dst_shape[shape_size - 3]; - auto h0 = args.dst_shape[shape_size - 2]; - auto w0 = args.dst_shape[shape_size - 1]; + auto w1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1]; + auto h1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1]; + auto h0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0]; + auto w0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0]; auto h1h0 = h1 * h0; auto h1h0w0 = h1h0 * w0; auto w1h1h0w0 = w1 * h1h0w0; @@ -198,16 +216,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con return OUT_OF_MEMORY; } - auto times = dst_hw_shape.at(0); - auto h = dst_hw_shape.at(1); - auto w = dst_hw_shape.at(2); + auto times = dst_hw_shape.at(kNdDimIndexN); + auto h = dst_hw_shape.at(kNdDimIndexH); + auto w = dst_hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.src_shape.size(); - auto w1 = args.src_shape[shape_size - 4]; - auto h1 = args.src_shape[shape_size - 3]; - auto h0 = args.src_shape[shape_size - 2]; - auto w0 = args.src_shape[shape_size - 1]; + auto w1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1]; + auto h1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1]; + auto h0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0]; + auto w0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0]; auto h1h0 = h1 * h0; auto h1h0w0 = h1h0 * w0; auto w1h1h0w0 = w1 * h1h0w0; diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index d890e681..88603d5c 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -23,12 +23,29 @@ #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" +#include "framework/common/types.h" #include "graph/utils/type_utils.h" namespace ge { namespace formats { namespace { const int kDimSize4D = 4; + +const size_t kSingleDim = 1; + +const size_t kNdDimIndexN = 0; +const size_t kNdDimIndexH = 1; +const size_t kNdDimIndexW = 2; + +const size_t kDimDValueBNdFZz = 2; // dim d-value between Nd and FractalZz + +const size_t kNdDimCountBackwardsW = 1; +const size_t kNdDimCountBackwardsWH = 2; + +const size_t kFZzDimCountBackwardsW0 = 1; +const size_t kFZzDimCountBackwardsW0H0 = 2; +const size_t kFZzDimCountBackwardsW0H0W1 = 3; +const size_t kFZzDimCountBackwardsW0H0W1H1 = 4; bool IsDataTypeSupport(DataType d_type) { return GetSizeByDataType(d_type) > 0; } using ShapeVector = std::vector; @@ -40,8 +57,8 @@ bool CheckShape(Format format, const ShapeVector &shape) { case FORMAT_NHWC: return CheckShapeValid(shape, kDimSize4D); default: - std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + - " and FORMAT_FRACTAL_ZZ is not supported."; + std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + + " and FORMAT_FRACTAL_ZZ is not supported."; GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } @@ -60,14 +77,14 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap auto w0 = GetCubeSizeByDataType(data_type); auto h0 = GetCubeSizeByDataType(data_type); switch (src_shape.size()) { - case 1: - dst_shape.push_back(1); - dst_shape.push_back(Ceil(src_shape[0], w0)); + case kSingleDim: + dst_shape.push_back(DIM_DEFAULT_VALUE); + dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0)); dst_shape.push_back(h0); dst_shape.push_back(w0); - hw_shape.push_back(1); - hw_shape.push_back(1); - hw_shape.push_back(src_shape[0]); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(DIM_DEFAULT_VALUE); + hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -76,17 +93,17 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap default: auto size = src_shape.size(); int64_t times = 1; - for (size_t i = 0; i != size - 2; i++) { + for (size_t i = 0; i != size - kDimDValueBNdFZz; i++) { dst_shape.push_back(src_shape[i]); times *= src_shape[i]; } - dst_shape.push_back(Ceil(src_shape[size - 2], h0)); - dst_shape.push_back(Ceil(src_shape[size - 1], w0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0)); + dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0)); dst_shape.push_back(h0); dst_shape.push_back(w0); hw_shape.push_back(times); - hw_shape.push_back(src_shape[size - 2]); - hw_shape.push_back(src_shape[size - 1]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); + hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); return PARAM_INVALID; @@ -127,16 +144,16 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con return OUT_OF_MEMORY; } // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = hw_shape.at(0); - auto h = hw_shape.at(1); - auto w = hw_shape.at(2); + auto times = hw_shape.at(kNdDimIndexN); + auto h = hw_shape.at(kNdDimIndexH); + auto w = hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.dst_shape.size(); - auto h1 = args.dst_shape[shape_size - 4]; - auto w1 = args.dst_shape[shape_size - 3]; - auto h0 = args.dst_shape[shape_size - 2]; - auto w0 = args.dst_shape[shape_size - 1]; + auto h1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1]; + auto w1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1]; + auto h0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0]; + auto w0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0]; auto h0w0 = h0 * w0; auto w1h0w0 = w1 * h0w0; auto h1w1h0w0 = h1 * w1h0w0; @@ -155,8 +172,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + w1_idx * w0) * size; auto dst_offset = (h0_head + w1_idx * h0w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -171,8 +188,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + src_w_idx) * size; auto dst_offset = (w0_head + w0_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -205,16 +222,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con } // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D - auto times = dst_hw_shape.at(0); - auto h = dst_hw_shape.at(1); - auto w = dst_hw_shape.at(2); + auto times = dst_hw_shape.at(kNdDimIndexN); + auto h = dst_hw_shape.at(kNdDimIndexH); + auto w = dst_hw_shape.at(kNdDimIndexW); auto hw = h * w; auto shape_size = args.src_shape.size(); - auto h1 = args.src_shape[shape_size - 4]; - auto w1 = args.src_shape[shape_size - 3]; - auto h0 = args.src_shape[shape_size - 2]; - auto w0 = args.src_shape[shape_size - 1]; + auto h1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1]; + auto w1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1]; + auto h0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0]; + auto w0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0]; auto h0w0 = h0 * w0; auto w1h0w0 = w1 * h0w0; auto h1w1h0w0 = h1 * w1h0w0; @@ -233,8 +250,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h0_head + w1_idx * h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -249,8 +266,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto dst_offset = (dst_h_head + dst_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index e623d9e7..9be74b1f 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -19,6 +19,7 @@ #include #include +#include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" @@ -29,21 +30,21 @@ namespace formats { namespace { std::map>> perm_args{ {FORMAT_NCHW, - {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, - {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, - {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, + {{FORMAT_NHWC, std::vector({kNchwN, kNchwH, kNchwW, kNchwC})}, + {FORMAT_HWCN, std::vector({kNchwH, kNchwW, kNchwC, kNchwN})}, + {FORMAT_CHWN, std::vector({kNchwC, kNchwH, kNchwW, kNchwN})}}}, {FORMAT_NHWC, - {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, - {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, + {{FORMAT_NCHW, std::vector({kNhwcN, kNhwcC, kNhwcH, kNhwcW})}, + {FORMAT_CHWN, std::vector({kNhwcC, kNhwcH, kNhwcW, kNhwcN})}, + {FORMAT_HWCN, std::vector({kNhwcH, kNhwcW, kNhwcC, kNhwcN})}}}, {FORMAT_HWCN, - {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, - {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, - {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, + {{FORMAT_NCHW, std::vector({kHwcnN, kHwcnC, kHwcnH, kHwcnW})}, + {FORMAT_NHWC, std::vector({kHwcnN, kHwcnH, kHwcnW, kHwcnC})}, + {FORMAT_CHWN, std::vector({kHwcnC, kHwcnH, kHwcnW, kHwcnN})}}}, {FORMAT_CHWN, - {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, - {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, + {{FORMAT_NCHW, std::vector({kChwnN, kChwnC, kChwnH, kChwnW})}, + {FORMAT_NHWC, std::vector({kChwnN, kChwnH, kChwnW, kChwnC})}, + {FORMAT_HWCN, std::vector({kChwnH, kChwnW, kChwnC, kChwnN})}}}, }; bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { diff --git a/ge/common/formats/utils/formats_definitions.h b/ge/common/formats/utils/formats_definitions.h index 7f873f1b..25f36d6a 100755 --- a/ge/common/formats/utils/formats_definitions.h +++ b/ge/common/formats/utils/formats_definitions.h @@ -23,6 +23,7 @@ static const int kCubeSize = 16; static const int kNiSize = 16; static const int64_t kShapeItemNumMAX = 1024UL * 1024UL * 1024UL * 1024UL; + enum NchwDimIndex { kNchwN, kNchwC, @@ -47,6 +48,14 @@ enum HwcnDimIndex { kHwcnDimsNum }; +enum ChwnDimIndex { + kChwnC, + kChwnH, + kChwnW, + kChwnN, + kChwnDimsNum +}; + enum Nc1hwc0DimIndex { kNc1hwc0N, kNc1hwc0C1, diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index b91f1204..44199c32 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -37,6 +37,8 @@ #include "graph/utils/type_utils.h" namespace ge { +const int kBaseInt = 10; + std::map TBEPluginManager::options_ = {}; // Get Singleton Instance @@ -155,7 +157,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { domi::FrameworkType type = domi::TENSORFLOW; auto it = options_.find(FRAMEWORK_TYPE); if (it != options_.end()) { - type = static_cast(std::strtol(it->second.c_str(), nullptr, 10)); + type = static_cast(std::strtol(it->second.c_str(), nullptr, kBaseInt)); } fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); GELOGI("Framework type is %s.", fmk_type.c_str()); diff --git a/ge/common/util.cc b/ge/common/util.cc index 480be3c1..0a343a83 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -51,14 +51,15 @@ namespace { * If such an exception is encountered during operation, * the proto file can be divided into several small files or the limit value can be increased. */ -const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. -const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M +const int kFileSizeOutLimitedOrOpenFailed = -1; +const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. +const int kWarningThreshold = 1073741824; // 536870912 * 2 536870912 represent 512M /// The maximum length of the file. -const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now +const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; -constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; +constexpr uint32_t kMaxConfigFileByte = 10485760; // 10 * 1024 * 1024 } // namespace namespace ge { @@ -76,7 +77,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(co std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == kFileSizeOutLimitedOrOpenFailed, return false, + "file size not valid."); std::ifstream fs(real_path, std::ifstream::in | std::ifstream::binary); if (!fs.is_open()) { @@ -118,20 +120,20 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); unsigned long long file_length = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, - ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); - return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); + mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, + ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); + return kFileSizeOutLimitedOrOpenFailed, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E19015", {"filepath"}, {input_file}); return -1, "File[%s] size is 0, not valid.", input_file.c_str()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > kMaxFileSizeLimit, - ErrorManager::GetInstance().ATCReportErrMessage( - "E19016", {"filepath", "filesize", "maxlen"}, - {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); - return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, - kMaxFileSizeLimit); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( + "E19016", {"filepath", "filesize", "maxlen"}, + {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); + return kFileSizeOutLimitedOrOpenFailed, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, + kMaxFileSizeLimit); return static_cast(file_length); } @@ -187,7 +189,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co std::streamsize size = file.tellg(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((size <= 0), file.close(); return false, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); return false, "file size %ld is out of limit: %d.", size, kMaxFileSizeLimit); file.seekg(0, std::ios::beg); // [no need to check value] @@ -210,8 +212,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty."); auto dir_path_len = directory_path.length(); if (dir_path_len >= MMPA_MAX_PATH) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)}); + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, + {directory_path, std::to_string(MMPA_MAX_PATH)}); GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH); return -1; } @@ -224,8 +226,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: if (ret != 0) { if (errno != EEXIST) { ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); - GELOGW("Can not create directory %s. Make sure the directory exists and writable.", - directory_path.c_str()); + GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str()); return ret; } } @@ -265,7 +266,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), ErrorManager::GetInstance().ATCReportErrMessage( - "E19000", {"path", "errmsg"}, {file, strerror(errno)}); + "E19000", {"path", "errmsg"}, {file, strerror(errno)}); return false, "Path[%s]'s realpath is empty, errmsg[%s]", file, strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); @@ -301,13 +302,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha google::protobuf::io::IstreamInputStream input(&fs); bool ret = google::protobuf::TextFormat::Parse(&input, message); GE_IF_BOOL_EXEC( - !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); + !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); return ret; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { - mmTimeval tv {}; + mmTimeval tv{}; int ret = mmGetTimeOfDay(&tv, nullptr); GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds @@ -315,7 +316,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t GetCurrentSecondTimestap() { - mmTimeval tv {}; + mmTimeval tv{}; int ret = mmGetTimeOfDay(&tv, nullptr); GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); auto total_use_time = tv.tv_sec; // seconds @@ -350,8 +351,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH, - ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)}); - return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, + {path, std::to_string(MMPA_MAX_PATH)}); + return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); // Nullptr is returned when the path does not exist or there is no permission // Return absolute path when path is accessible @@ -385,16 +387,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) #ifdef __GNUC__ - std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; + std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; #else - std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; + std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; #endif GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(real_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, real_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); + !ValidateStr(real_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, real_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); // The absolute path points to a file that is not readable if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) { @@ -416,24 +418,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH, - ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); - return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); + return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), + MMPA_MAX_PATH); // A regular matching expression to verify the validity of the input file path // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) #ifdef __GNUC__ - std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; + std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; #else - std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; + std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; #endif GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(file_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); + !ValidateStr(file_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, file_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); std::string real_path = RealPath(file_path.c_str()); // Can get absolute path (file exists) diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index fb0f3e85..8baa5b05 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -28,6 +28,7 @@ namespace ge { namespace model_runner { +const int kOffsetUnit = 8; RuntimeModel::~RuntimeModel() { GELOGI("RuntimeModel destructor start"); @@ -495,7 +496,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model return false; } uint64_t *buff = reinterpret_cast(const_cast(constant->weight_data.data())); - int64_t offset = elem_num * 8; + int64_t offset = elem_num * kOffsetUnit; uintptr_t hbm_raw_data_base_addr = reinterpret_cast(constant->output_addrs[0]) + offset; for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..682f11eb 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -50,6 +50,9 @@ const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; +const int kTimeSpecNano = 1000000000; +const int kTimeSpecMiro = 1000000; +const int kSessionMaxBias = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; @@ -337,7 +340,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); @@ -1041,12 +1044,12 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { GELOGE(INTERNAL_ERROR, "Failed to get current time."); return INTERNAL_ERROR; } - session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us + session_id = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us session_id_bias_++; // max bais 100. - session_id_bias_ = session_id_bias_ % 100; - session_id = session_id * 100 + session_id_bias_; + session_id_bias_ = session_id_bias_ % kSessionMaxBias; + session_id = session_id * kSessionMaxBias + session_id_bias_; GELOGD("Generate new session id: %lu.", session_id); return SUCCESS; @@ -1117,7 +1120,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 3e3a715d..a1620c3f 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -43,6 +43,13 @@ const char *kIsLastNode = "is_last_node"; const char *kIsFirstNode = "is_first_node"; const int64_t kCloseSkt = 100; const uint32_t kAddrLen = sizeof(void *); +const int kBaseInt = 10; +const int kStrtolFail = 0; +const int kArgsInputDesc = 0; +const int kArgsInputAddr = 1; +const int kArgsOutputDesc = 2; +const int kArgsOutputAddr = 3; +const int kArgsAttrHandle = 4; } // namespace namespace ge { @@ -371,7 +378,7 @@ Status KernelTaskInfo::Distribute() { rtError_t rt_ret = RT_ERROR_NONE; char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; + int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, kBaseInt) : kStrtolFail; bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); @@ -749,15 +756,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel return FAILED; } } - *(reinterpret_cast(args + ctx_.argsOffset[0])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsInputDesc])) = static_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 - *(reinterpret_cast(args + ctx_.argsOffset[1])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsInputAddr])) = static_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 - *(reinterpret_cast(args + ctx_.argsOffset[2])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsOutputDesc])) = static_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 - *(reinterpret_cast(args + ctx_.argsOffset[3])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsOutputAddr])) = static_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 - *(reinterpret_cast(args + ctx_.argsOffset[4])) = + *(reinterpret_cast(args + ctx_.argsOffset[kArgsAttrHandle])) = static_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 39373901..4e22cd7c 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -19,6 +19,8 @@ namespace ge { namespace skt { +const size_t kFusedKernelMinimumSize = 2; +const size_t kFusedKernelSizeUnit = 2; SuperKernelFactory &SuperKernelFactory::GetInstance() { static SuperKernelFactory factory; return factory; @@ -79,17 +81,17 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list return FAILED; } - if (super_kernel_size < 2) { + if (super_kernel_size < kFusedKernelMinimumSize) { GELOGW( "SKT: the number of kernels being fused must be greater than or " "equal to 2"); return FAILED; } GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); - const size_t nav_table_len = 2 * stub_func_list.size(); - std::unique_ptr nav_table(new (std::nothrow) uint64_t[nav_table_len]); + const size_t nav_table_len = kFusedKernelSizeUnit * stub_func_list.size(); + std::unique_ptr nav_table(new(std::nothrow) uint64_t[nav_table_len]); GE_CHECK_NOTNULL(nav_table); - uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); + uint64_t nav_table_size = kFusedKernelSizeUnit * stub_func_list.size() * sizeof(int64_t); rtError_t rt_ret; void *hbm_nav_table_addr = nullptr; @@ -101,10 +103,10 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); // store two uint64_t address // address divided by 4 because of 32bits encoding, call offset will *4 when calculating - nav_table[i * 2] = static_cast(reinterpret_cast(sub_device_func)) / 4; - GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); - nav_table[i * 2 + 1] = static_cast(reinterpret_cast(args_addr_list[i])); - GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); + nav_table[i * kFusedKernelSizeUnit] = static_cast(reinterpret_cast(sub_device_func)) / 4; + GELOGD("SKT: CALL offet %lu", nav_table[i * kFusedKernelSizeUnit]); + nav_table[i * kFusedKernelSizeUnit + 1] = static_cast(reinterpret_cast(args_addr_list[i])); + GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * kFusedKernelSizeUnit + 1]); } rt_ret = rtMalloc(reinterpret_cast(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/new_model_manager/ts_mem_mall.h index 42ad3957..64a64930 100644 --- a/ge/graph/load/new_model_manager/ts_mem_mall.h +++ b/ge/graph/load/new_model_manager/ts_mem_mall.h @@ -25,7 +25,7 @@ #include "framework/common/debug/ge_log.h" namespace { -constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M +constexpr uint32_t kMaxTsMemBlock = 2097152; // Max block 2M 2 * 1024 * 1024 constexpr uint32_t kTsMemAligment = 64; // Malloc for 64 bits align constexpr uint32_t kTsMemAlignMask = kTsMemAligment - 1; } diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 4ba39ca8..d6027a08 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -25,13 +25,13 @@ namespace ge { const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, - 8 * kMByteSize, - 32 * kMByteSize, - 128 * kMByteSize, + kBinSizeUnit8 * kMByteSize, + kBinSizeUnit32 * kMByteSize, + kBinSizeUnit128 * kMByteSize, kGByteSize, - 4 * kGByteSize, - 16 * kGByteSize, - 26 * kGByteSize}; + kBinSizeUnit4 * kGByteSize, + kBinSizeUnit16 * kGByteSize, + kBinSizeUnit26 * kGByteSize}; static bool BlockComparator(const Block *left, const Block *right) { if (left->size != right->size) { diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index dc4af753..e024d5cd 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -34,10 +34,17 @@ namespace ge { constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes +constexpr size_t kBinSizeUnit4 = 4; +constexpr size_t kBinSizeUnit8 = 8; +constexpr size_t kBinSizeUnit16 = 16; +constexpr size_t kBinSizeUnit26 = 26; +constexpr size_t kBinSizeUnit32 = 32; +constexpr size_t kBinSizeUnit128 = 128; + constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; -constexpr size_t kMByteSize = 1024 * 1024; -constexpr size_t kGByteSize = 1024 * 1024 * 1024; +constexpr size_t kMByteSize = 1048576; // 1024 * 1024 +constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 static const uint32_t kNumBins = 8; diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index be7d4eb2..84a07069 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -280,9 +280,9 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return PARAM_INVALID; } uint64_t free_size = total_size_ - var_mem_size_; - if (free_size < (size + kSessionMemAlignSize * 2)) { + if (free_size < (size + kSessionMemAlignSize * kSessionMemAlignUnit)) { GELOGE(PARAM_INVALID, "Out of memory : current var size[%lu] exceeds total var size[%lu]", - size + kSessionMemAlignSize * 2 + var_mem_size_, total_size_); + size + kSessionMemAlignSize * kSessionMemAlignUnit + var_mem_size_, total_size_); return PARAM_INVALID; } diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index b4f6aca3..fcbc92c5 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -42,6 +42,7 @@ const size_t kGraphMemoryBuffer = 4UL * 1024UL * 1024UL * 1024UL; const size_t kMaxMemorySize = 256UL * 1024UL * 1024UL * 1024UL; const char kEnvGeuseStaticMemory[] = "GE_USE_STATIC_MEMORY"; const uint64_t kSessionMemAlignSize = 512; +const size_t kSessionMemAlignUnit = 2; enum MemStatus { NORMAL = 0, diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 5888471a..dfc6c9df 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -26,6 +26,13 @@ namespace { using namespace ge; const int kIdentityAnchorIndex = 0; +const size_t kSerialStringVecSize = 4; + +const int kCaseReadOnly = 0; +const int kCaseScopeWriteable = 2; +const int kCaseWriteable = 3; +const int kCaseInvalidRWType = 5; + // rw type of input. enum class InputRWType { kReadOnly, // Normal op input only read @@ -55,7 +62,7 @@ thread_local map node_rwtype_map_; /// @return rw_type_name /// static std::string InputRWTypeToSerialString(InputRWType rw_type) { - const static char *names[4] = {"ReadOnly", "Writeable", "ScopeWriteable", "InvalidRWType"}; + const static char *names[kSerialStringVecSize] = {"ReadOnly", "Writeable", "ScopeWriteable", "InvalidRWType"}; return names[static_cast(rw_type)]; } @@ -65,7 +72,7 @@ static std::string InputRWTypeToSerialString(InputRWType rw_type) { /// @return rw_type_name /// static std::string OutputRWTypeToSerialString(OutputRWType rw_type) { - const static char *names[4] = {"ReadOnly", "SoftRead", "Writeable", "InvalidRWType"}; + const static char *names[kSerialStringVecSize] = {"ReadOnly", "SoftRead", "Writeable", "InvalidRWType"}; return names[static_cast(rw_type)]; } @@ -118,13 +125,13 @@ InputRWType GetInputRwTypeInConflict(const std::set &rw_type_set) { } switch (total_rw_type) { - case 0: + case kCaseReadOnly: return InputRWType::kReadOnly; // all input rw type is readonly - case 2: + case kCaseScopeWriteable: return InputRWType::kScopeWriteable; // readonly 2 scope_writeable - case 3: + case kCaseWriteable: return InputRWType::kWriteable; // all input rw type is writeable or readonly 2 writeable - case 5: + case kCaseInvalidRWType: return InputRWType::kInvalidRWType; // writeable 2 scope_writeable default: return InputRWType::kInvalidRWType; diff --git a/ge/graph/passes/data_pass.cc b/ge/graph/passes/data_pass.cc index 4ec8743e..5bbd2fb1 100644 --- a/ge/graph/passes/data_pass.cc +++ b/ge/graph/passes/data_pass.cc @@ -21,6 +21,7 @@ namespace ge { namespace { +const int kDataIndexOffset = 2; Status MappingSubgraphInput(const ComputeGraphPtr &graph, const std::function &input) { for (const auto &node : graph->GetDirectNode()) { if (node->GetType() != DATA) { @@ -111,7 +112,7 @@ Status ParseSubgraphPostFnWhile(const string &subgraph_name, const ComputeGraphP Status ParseSubgraphPostFnFor(const string &subgraph_name, const ComputeGraphPtr &graph) { return MappingSubgraphIndex(graph, - [](int data_index) { return (data_index == 0) ? 0 : data_index + 2; }, + [](int data_index) { return (data_index == 0) ? 0 : data_index + kDataIndexOffset; }, [](int retval_index) { return retval_index; }); } diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index f5280a36..31dee390 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -37,6 +37,7 @@ namespace { const uint32_t kSubgraphLoopVarInputIndex = 0; const uint32_t kSubgraphInputIndex = 1; const uint32_t kWhileOutputIndex = 5; + const size_t kIDiffValue = 2; const std::string kAbs = "Abs"; } @@ -694,7 +695,7 @@ Status ForPass::UpdateForBodyInputMapping(const WhileInfo &while_info) { } else if ((i == FOR_LIMIT_INPUT) || (i == FOR_DELTA_INPUT)) { continue; } else { - input_mapping[i] = i - 2; + input_mapping[i] = i - kIDiffValue; } } for_body->UpdateInputMapping(input_mapping); diff --git a/ge/graph/passes/mark_agnostic_pass.cc b/ge/graph/passes/mark_agnostic_pass.cc index 8c9a0451..30fa1742 100644 --- a/ge/graph/passes/mark_agnostic_pass.cc +++ b/ge/graph/passes/mark_agnostic_pass.cc @@ -19,6 +19,8 @@ #include "graph/utils/tensor_utils.h" namespace ge { +const size_t kTwoInputNodesSize = 2; + Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { for (const auto &node : graph->GetDirectNode()) { auto node_type = NodeUtils::GetNodeType(*node); @@ -52,7 +54,7 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { /// Enter-----------+ /// +-> Merge /// NextIteration---+ - if (input_nodes.size() == 2) { + if (input_nodes.size() == kTwoInputNodesSize) { if (input_nodes.at(0)->GetType() == ENTER && input_nodes.at(1)->GetType() == NEXTITERATION) { continue; } diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index 80394e7a..26d82820 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -29,6 +29,8 @@ namespace ge { const int kValueIndexOutputIndex = 1; +const size_t kCaseNoInput = 0; +const size_t kCaseOneInput = 1; Status MergePass::Run(NodePtr &node) { GELOGD("MergePass running"); @@ -50,7 +52,7 @@ Status MergePass::Run(NodePtr &node) { const auto &in_data_nodes = node->GetInDataNodes(); switch (in_data_nodes.size()) { - case 0: { + case kCaseNoInput: { /// Case A: input_count = 0, the output of merge node is inactive as well /// In which case the output branch can be removed /// until another merge node is met @@ -65,7 +67,7 @@ Status MergePass::Run(NodePtr &node) { } return ret; } - case 1: { // Case B: input_count = 1, the merge node can be optimized out + case kCaseOneInput: { // Case B: input_count = 1, the merge node can be optimized out std::vector merge_io_map = {PassUtils::GetUniqueInDataAnchorIndex(node), -1}; if (merge_io_map[0] != -1 && IsNeedChangeIndexToConstant(node)) { int index = merge_io_map[0]; diff --git a/ge/host_kernels/gather_v2_kernel.cc b/ge/host_kernels/gather_v2_kernel.cc index e52b4534..ee73626b 100644 --- a/ge/host_kernels/gather_v2_kernel.cc +++ b/ge/host_kernels/gather_v2_kernel.cc @@ -40,6 +40,10 @@ const size_t kGatherV2InpotNum = 3; const size_t kMaxIndicatesDims = 1; // only support scalar and 1 dims indicates_ const std::set supported_type = {DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}; +const int64_t DIM_AXIS_0 = 0; +const int64_t DIM_AXIS_1 = 1; +const int64_t DIM_AXIS_2 = 2; +const int64_t DIM_AXIS_3 = 3; } // namespace template Status GatherV2Kernel::ProcessAxis0(ConstGeTensorPtr tensor_x, GeTensorPtr output) { @@ -191,16 +195,16 @@ Status GatherV2Kernel::GenData(const int64_t data_num, ConstGeTensorPtr tensor_x Status ret = SUCCESS; switch (axis) { - case 0: + case DIM_AXIS_0: ret = ProcessAxis0(tensor_x, output); break; - case 1: + case DIM_AXIS_1: ret = ProcessAxis1(tensor_x, output); break; - case 2: + case DIM_AXIS_2: ret = ProcessAxis2(tensor_x, output); break; - case 3: + case DIM_AXIS_3: ret = ProcessAxis3(tensor_x, output); break; default: diff --git a/ge/host_kernels/range_kernel.cc b/ge/host_kernels/range_kernel.cc index 32a72b47..97254fff 100644 --- a/ge/host_kernels/range_kernel.cc +++ b/ge/host_kernels/range_kernel.cc @@ -32,6 +32,9 @@ namespace ge { namespace { constexpr size_t kRangeInputNum = 3; constexpr uint32_t kRangeDimNum = 0; +constexpr size_t kStartIndex = 0; +constexpr size_t kLimitIndex = 1; +constexpr size_t kDeltaIndex = 2; const std::set kRangeSupportedType = {DT_INT32, DT_FLOAT}; } // namespace @@ -53,9 +56,9 @@ Status RangeKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetTensorDesc().GetDataType(); if (data_type == DT_FLOAT) { if (GetRange(*reinterpret_cast(start->GetData().data()), diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index f506caec..2c38367a 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -23,6 +23,8 @@ namespace ge { namespace hybrid { +const size_t kPaddingUnit = 2; + size_t kMaxHbmMemorySize = 1024UL * 1024UL * 1024UL * 1024UL; // 1024G std::map> NpuMemoryAllocator::allocators_; @@ -77,7 +79,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { } } // padding up to multiple of padding, and add extra padding - allocate_size = (size + 2 * padding - 1) / padding * padding; + allocate_size = (size + kPaddingUnit * padding - 1) / padding * padding; GELOGD("Padding size %ld by %d. final size = %zu.", size, padding, allocate_size); buffer = MemManager::Instance() .CachingInstance(RT_MEMORY_HBM) diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc index c0b0b17b..f0d4324a 100644 --- a/ge/hybrid/executor/node_done_manager.cc +++ b/ge/hybrid/executor/node_done_manager.cc @@ -21,7 +21,7 @@ namespace ge { namespace hybrid { namespace { -constexpr int kDefaultWaitTimeoutInSec = 60 * 10; +constexpr int kDefaultWaitTimeoutInSec = 600; } bool NodeDoneManager::Cond::Await() { std::unique_lock lk(cond_mu_); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 76494c68..b7188a85 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -68,7 +68,7 @@ const char *const kModeSupport = "only support 0(model to framework model), " const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; // limit available mem size 2G -const long kMinAvailableMem = 2 * 1024 * 1024; +const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 DEFINE_string(model, "", "The model file."); DEFINE_string(output, "", "The output file path&name."); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index b5e1e105..80a13ea7 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -68,6 +68,9 @@ const std::string kScopeIdAttr = "fusion_scope"; const char *const kOutputTypeSample = "correct sample is \"opname:index:dtype\""; const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8"; const char *const kOutputTypeError = "The multiple out nodes set in output_type must be found in out_nodes."; +const size_t kNodeNameIndex = 0; +const size_t kIndexStrIndex = 1; +const size_t kDTValueIndex = 2; } // namespace // When the model is converted to a JSON file, the following operator attributes in the blacklist will be ignored @@ -381,14 +384,14 @@ Status ParseOutputType(const std::string &output_type, std::mapGetErrDesc(value) +const int MODID_OMG = 1; // OMG module ID +const int MODID_OME = 2; // OME module ID +const int MODID_CALIBRATION = 3; // Calibration module ID + namespace domi { class StatusFactory { public: From a755e711d1befaa5162ede069023ad705eda79d3 Mon Sep 17 00:00:00 2001 From: wxl Date: Wed, 9 Dec 2020 10:00:33 +0800 Subject: [PATCH 048/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c3e557c8..b3be6acd 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -463,7 +463,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector Date: Wed, 9 Dec 2020 10:05:02 +0800 Subject: [PATCH 049/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index b3be6acd..34663493 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -463,7 +463,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector Date: Wed, 9 Dec 2020 10:28:56 +0800 Subject: [PATCH 050/127] change python3.7 to python3 and modify atc.bin fwk_atc.bin --- CMakeLists.txt | 2 +- ge/offline/atc | 4 ++-- metadef | 2 +- parser | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7528f95..648bb954 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR} option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE) if (ENABLE_OPEN_SRC) - set(HI_PYTHON python3.7) + set(HI_PYTHON python3) include(cmake/external_libs/protobuf_shared.cmake) include(cmake/external_libs/protobuf_static.cmake) diff --git a/ge/offline/atc b/ge/offline/atc index a2b96482..73dfbee0 100644 --- a/ge/offline/atc +++ b/ge/offline/atc @@ -14,7 +14,7 @@ export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" if [ -f "${PKG_PATH}/bin/atc.bin" ];then - atc.bin $@ + ${PKG_PATH}/bin/atc.bin/atc.bin $@ else - fwk_atc.bin $@ + ${PKG_PATH}/bin/atc.bin/fwk_atc.bin $@ fi diff --git a/metadef b/metadef index 5b9a7f84..7472245f 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 5b9a7f84a4347f8816d492aa51f2414ccf8a0744 +Subproject commit 7472245fcaed273b7cff99a1f6e6bab3313be684 diff --git a/parser b/parser index 70369668..11c88ffc 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 70369668abebed84942d9f355494a89e82cc1eac +Subproject commit 11c88ffc907399db084766bf9d5c171e5432eb8d From 56b950a09ddfb6a2cfb0efbe3da9f2e024a554ba Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Wed, 9 Dec 2020 10:50:30 +0800 Subject: [PATCH 051/127] Dynamic multi batch memory optimization --- .../build/memory/binary_block_mem_assigner.cc | 10 +- ge/graph/build/memory/block_mem_assigner.cc | 511 +++++++++++------- ge/graph/build/memory/block_mem_assigner.h | 36 +- ge/graph/build/memory/graph_mem_assigner.cc | 4 +- .../load/new_model_manager/davinci_model.cc | 2 +- .../load/new_model_manager/model_utils.cc | 4 +- 6 files changed, 363 insertions(+), 204 deletions(-) diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index ecd2488c..16420123 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -22,7 +22,7 @@ namespace { const uint32_t kRangeCeilInterval = 2; const uint32_t kLogBase = 2; const int64_t kLargeBlockSize = 8 * 1024 * 1024; -const int64_t kLargeBlockRangeSize = 10; +const int64_t kLargeBlockRangeSize = 2; } // namespace namespace ge { @@ -73,15 +73,17 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGE(FAILED, "dividend is 0!"); return FAILED; } + // Memory size is 512 aligned, so it is not necessary to take less than 512 + int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front(); auto range_number = static_cast( - ceil(log(all_memory_size.back() / static_cast(all_memory_size.front())) / log(kLogBase))); + ceil(log(all_memory_size.back() / static_cast(min_memory_size)) / log(kLogBase))); range_number = (range_number == 0) ? 1 : range_number; GELOGD("Range number: %zu", range_number); vector> ranges(range_number); GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); size_t range_number_limit = all_memory_size.size() / range_number; - int64_t range_ceil = all_memory_size[0]; + int64_t range_ceil = min_memory_size; for (size_t i = 1; i <= range_number; i++) { GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast(range_ceil), kRangeCeilInterval), GELOGE(FAILED, "Multiply result is out of range."); @@ -114,7 +116,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { range_ceils.push_back(range.back()); } } - GELOGD("Range ceils: %s", ToString(range_ceils).c_str()); + GELOGI("Range ceils: %s", ToString(range_ceils).c_str()); return SUCCESS; } diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index cdf768d8..bd2a9912 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,6 +65,98 @@ void AlignMemOffset(size_t &mem_align_size) { mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; } +static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) + && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { + return true; + } + return false; +} + +void GetLifeList(const MemoryBlock &block, std::vector &life_list, bool child) { + for (auto &node : block.NodeTypeIndexList()) { + life_list.emplace_back(node); + } + + if (child) { + for (auto child_block : block.ChildBlockList()) { + if (child_block == nullptr) { + continue; + } + if (block.stream_id_ != child_block->stream_id_ || !block.same_stream_ || !child_block->same_stream_) { + life_list.clear(); + return; + } + GetLifeList(*child_block, life_list, child); + } + } +} + +bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { + if ((left.node == nullptr) || (right.node == nullptr)) { + return true; + } + auto left_node_op_desc = left.node->GetOpDesc(); + auto right_node_op_desc = right.node->GetOpDesc(); + if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { + if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { + if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { + return true; + } + } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { + return true; + } else { + if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { + return true; + } + } + } + return false; +} + +/// +/// When child block's life time are not cross with parent block, they can be reused(only same stream). +/// |-----------------------------parent block---------------------| +/// |------child block1--------------||------child block2------| +/// |--child block1-1-| +/// +bool CanIntervalLifeReuse(MemoryBlock &parent_block, MemoryBlock &child_block) { + // judge by interval life time, only same stream can be judged by interval life time + if (parent_block.stream_id_ != child_block.stream_id_ || !parent_block.same_stream_ || !child_block.same_stream_ + || parent_block.NodeTypeIndexList().empty() || child_block.NodeTypeIndexList().empty()) { + return false; + } + + // quick judge by front and back node + if (CrossLifeTime(parent_block.NodeTypeIndexList().front(), child_block.NodeTypeIndexList().front())) { + return false; + } + if (CrossLifeTime(parent_block.NodeTypeIndexList().back(), child_block.NodeTypeIndexList().back())) { + return false; + } + + std::vector life_list; + GetLifeList(parent_block, life_list, false); + GetLifeList(child_block, life_list, true); + if (life_list.empty()) { + return false; + } + std::sort(life_list.begin(), life_list.end(), CompareLifeTime); + size_t pre_life_end = 0; + for (auto &node : life_list) { + auto node_op_desc = node.node->GetOpDesc(); + if (node_op_desc != nullptr && pre_life_end >= static_cast(node_op_desc->GetId())) { + // life time cross + return false; + } + pre_life_end = node.life_time_end; + } + GELOGI("Block size[%zu, %zu] life time are not cross.", parent_block.Size(), child_block.Size()); + return true; +} + void MemoryBlock::SetHeadOffset(size_t offset) { head_offset_ = offset; size_t child_offset = head_offset_; @@ -125,20 +217,12 @@ size_t MemoryBlock::AlignSize() const { return align_block_size; } -bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { - if (node_type_index_list_.empty()) { +bool MemoryBlock::IsSameBatchLabel() { + // only same batch label can reuse + if (batch_label_.empty() || node_type_index_list_.empty()) { return false; } - auto node_op_desc = node_type_index_list_[0].node->GetOpDesc(); - if (node_op_desc == nullptr) { - return false; - } - // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter - (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, first_batch_label); - if (first_batch_label.empty()) { - return false; - } bool all_same_label = true; for (size_t index = 1; index < node_type_index_list_.size(); ++index) { if (node_type_index_list_[index].node == nullptr) { @@ -147,8 +231,9 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { std::string batch_label; auto index_op_desc = node_type_index_list_[index].node->GetOpDesc(); GE_IF_BOOL_EXEC(index_op_desc == nullptr, continue); + // not all op has ATTR_NAME_BATCH_LABEL, no need check return value, only check out parameter (void)ge::AttrUtils::GetStr(index_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (first_batch_label != batch_label) { + if (batch_label_ != batch_label) { all_same_label = false; break; } @@ -197,7 +282,7 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi } void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { - if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { + if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) { return; } if (block->continuous_block_) { @@ -207,16 +292,27 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ MemoryBlock *parent = nullptr; MemoryBlock *child = nullptr; // merge small block to large block - if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { - if ((child_offset_ + block->AlignSize()) <= AlignSize()) { - parent = this; - child = block; - } else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) { - parent = block; - child = this; + // noalign size 802816 + 802816 = 1605632 can reuse + // after 32 align size 802848 + 802848 > 1605664 can't reuse + // after 512 align size 803328 + 803328 > 1606144 can't reuse + // so 803328 + 803328 = 1606144 + 512 can reuse + if ((child_offset_ + block->AlignSize()) <= (AlignSize() + MEM_ALIGN_SIZE)) { + parent = this; + child = block; + } else if ((block->child_offset_ + AlignSize()) <= (block->AlignSize() + MEM_ALIGN_SIZE)) { + parent = block; + child = this; + } + + if ((parent != nullptr) && (child != nullptr)) { + // Different streams must use stream dependency to judge the life cycle + // In case same stream if it has child block, can judge all the child block's life time in CanIntervalLifeReuse + bool can_block_life_reuse = (child->child_blocks_.empty() + && (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd())); + if (!can_block_life_reuse && !CanIntervalLifeReuse(*parent, *child)) { + return; } - } - if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { + parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; @@ -261,6 +357,7 @@ size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &tota void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, std::map &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { GE_CHECK_NOTNULL_EXEC(node, return); + GE_CHECK_NOTNULL_EXEC(org_node, return); auto node_desc = node->GetOpDesc(); GE_CHECK_NOTNULL_EXEC(node_desc, return); auto node_id = node_desc->GetId(); @@ -415,12 +512,60 @@ BlockMemAssigner::~BlockMemAssigner() { } } +void GetMaxBatchAllMemorySize(std::map> &batch_all_memory_size, + std::map batch_total_size, vector &all_memory_size, + std::string &max_batch_label) { + // use max batch all memory size for reuse range + int64_t max_batch_size = 0; + for (const auto &it : batch_total_size) { + GELOGI("Batch[%s] total memory size[%ld]", it.first.c_str(), it.second); + // no batch label + if (it.first.empty()) { + continue; + } + if (it.second > max_batch_size) { + max_batch_size = it.second; + max_batch_label = it.first; + } + } + GELOGI("Max batch[%s] total memory size[%ld]", max_batch_label.c_str(), max_batch_size); + + for (const auto &it : batch_all_memory_size) { + if (it.first.empty() || (it.first == max_batch_label)) { + all_memory_size.insert(all_memory_size.end(), it.second.begin(), it.second.end()); + } + } + // all_memory_size can't be empty + if (all_memory_size.empty()) { + all_memory_size.emplace_back(MEM_ALIGN_SIZE); + } + sort(all_memory_size.begin(), all_memory_size.end()); + GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); + + for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { + if (*iter == 0) { + iter = all_memory_size.erase(iter); + } else { + ++iter; + } + } +} + void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; + std::map> batch_all_memory_size; + std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); + if (CheckIsZeroMemNodeType(node_op_desc->GetType())) { + continue; + } + + std::string batch_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (node_op_desc->GetType() == ATOMICADDRCLEAN) { atomic_addr_clean_id_ = node_op_desc->GetId(); } @@ -434,9 +579,14 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { if (!reuse_input) { int64_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - if (anchor_to_symbol_.empty()) { - all_memory_size.emplace_back(size); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; } else { + batch_total_size[batch_label] += size; + } + + if (!anchor_to_symbol_.empty()) { auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); if (iter1 == anchor_to_symbol_.end()) { continue; @@ -452,23 +602,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { } } temp.clear(); - GetNodeWorkSpaceSize(n, temp); - all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); - } - for (const auto &pair : symbol_size_) { - all_memory_size.emplace_back(pair.second); - } - sort(all_memory_size.begin(), all_memory_size.end()); - GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); - - for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { - if (*iter == 0) { - iter = all_memory_size.erase(iter); - } else { - ++iter; - } + GetNodeWorkSpaceSize(n, temp, batch_total_size[batch_label]); + batch_all_memory_size[batch_label].insert(batch_all_memory_size[batch_label].end(), temp.begin(), temp.end()); } - + GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); + GetMaxBatchAllMemorySize(batch_all_memory_size, batch_total_size, all_memory_size, max_batch_label_); InitReuseFlag(); PrintSymbolMap(); } @@ -529,16 +667,6 @@ bool CanReuseBySize(const map &reusable_block_counts, const Me bool can_reuse = false; if (reusable_block.Size() == block_size) { can_reuse = true; - } else { - string key = std::to_string(reusable_block.Size()); - key += "_" + std::to_string(reusable_block.stream_id_); - key += "_" + std::to_string(reusable_block.memory_type_); - auto it = reusable_block_counts.find(key); - GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && - (reusable_block.Size() > block_size), - can_reuse = true; - GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", - reusable_block.Size(), block_size);); } return can_reuse; } @@ -860,17 +988,26 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); + std::string batch_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty() || (batch_label == max_batch_label_)) { + size_t align_size = real_size; + AlignMemOffset(align_size); + theory_memory_size_ += align_size; + if (theory_memory_size_ > theory_min_memory_size_) { + theory_min_memory_size_ = theory_memory_size_; + } + } bool is_reuse_memory = false; - string ge_disable_reuse_mem_env = "0"; - (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); - if (ge_disable_reuse_mem_env != "1") { + if (ge_disable_reuse_mem_env_ != "1") { bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) : !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem; - auto stream_id = node_op_desc->GetStreamId(); - if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) { + bool do_reuse = is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty(); + if (do_reuse) { + auto stream_id = node_op_desc->GetStreamId(); for (auto it = reusable_blocks_[memory_type][stream_id].rbegin(); it != reusable_blocks_[memory_type][stream_id].rend(); ++it) { MemoryBlock *reusable_block = *it; @@ -879,15 +1016,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GELOGI("Unreusable block."); continue; } - std::string batch_label; - if (reusable_block->IsSameLabel(batch_label)) { - std::string op_label; - (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label); - if (batch_label != op_label) { - GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); - continue; - } - } + GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { @@ -914,10 +1043,11 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - block->Init(real_size, mem_type, n, out_index, no_align_size); + block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; + block->batch_label_ = batch_label; if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -945,6 +1075,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec return nullptr; } + if (CheckIsZeroMemNodeType(n->GetType())) { + zero_memory_list_.emplace_back(n, kOutput, index); + continue; + } + int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { GELOGI("Get size failed"); @@ -957,9 +1092,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec // only apply total size in first block if (index != 0) { zero_memory_list_.emplace_back(n, kOutput, index); - } - - if (index == 0) { + } else { NodeIndexIO node_index_io(n, index, kOut); auto iter = anchor_to_symbol_.find(node_index_io.ToString()); if (iter != anchor_to_symbol_.end()) { @@ -972,6 +1105,10 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec } } + if (total_size == 0) { + return nullptr; + } + auto block_size = GetBlockSize(total_size, ranges); GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, block_size); @@ -1119,15 +1256,28 @@ bool IsKnownSubgraphData(const NodePtr &node) { return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); } -void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory) { +void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, + bool same_stream) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory"); --to_release->ref_count_; + if (!same_stream) { + to_release->same_stream_ = false; + } if (to_release->ref_count_ == 0) { - to_release->SetLifeTimeEnd(life_time_); - reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); + if (to_release->reuse_mem_ && !to_release->RealSizeList().empty()) { + if (to_release->batch_label_.empty() || (to_release->batch_label_ == max_batch_label_)) { + size_t align_size = to_release->RealSizeList().back(); + AlignMemOffset(align_size); + theory_memory_size_ -= align_size; + } + } + if (to_release->same_stream_) { + to_release->SetLifeTimeEnd(life_time_); + reusable_memory.emplace_back(to_release); + AddReusableBlockCount(*to_release, reusable_block_counts_); + } } } @@ -1167,10 +1317,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_mapGetName().c_str()); if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && - (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx())) && - (node->GetOpDesc()->GetStreamId() == block->stream_id_)) { - ReleaseMemory(block, reusable_memory); - if (block->ref_count_ == 0) { + (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx()))) { + ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); + if (block->ref_count_ == 0 && block->same_stream_) { SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); } } @@ -1328,7 +1477,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { iter->second[stream_id].clear(); } vector temp; - GetNodeWorkSpaceSize(n, temp); + int64_t tatal_size = 0; + GetNodeWorkSpaceSize(n, temp, tatal_size); vector workspace_bytes; vector tvm_workspace_memory_type; bool has_tvm_workspace_mem_type_attr = @@ -1380,9 +1530,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { (void)mem_block; // Fix warning } - bool merge_dynamic_batch = false; - GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks()); - GE_IF_BOOL_EXEC((!(ge_disable_reuse_mem_env_ == "1") && !merge_dynamic_batch), ReuseBlocksByLifeTime(ranges.size())); + GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), ReuseBlocksByLifeTime(ranges.size())); AssignContinuousBlocks(); ResizeMemoryBlocks(); @@ -1402,92 +1550,19 @@ void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_f } } -void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector &workspace_memory) { +void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector &workspace_memory, + int64_t &total_size) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null."); vector workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes(); GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); for (int64_t byte_size : workspace_byte_nums) { workspace_memory.emplace_back(byte_size); + total_size += byte_size; GELOGD("push back size:%ld", byte_size); } } -// descending order -static bool CompareBlockMaxSize(MemoryBlock *left, MemoryBlock *right) { - if (left == nullptr || right == nullptr) { - return false; - } - auto left_max_size = std::max_element(left->RealSizeList().begin(), left->RealSizeList().end()); - if (left_max_size != left->RealSizeList().end()) { - auto right_max_size = std::max_element(right->RealSizeList().begin(), right->RealSizeList().end()); - if (right_max_size == right->RealSizeList().end() || (*left_max_size > *right_max_size)) { - return true; - } - } - return false; -} - -void MergeBlocks(std::vector &dest, std::vector &src) { - for (size_t i = 0; i < dest.size(); ++i) { - if (i >= src.size()) { - return; - } - if (dest[i] != nullptr && src[i] != nullptr) { - if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { - GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", - i, dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); - continue; - } - for (auto &symbol : src[i]->SymbolList()) { - dest[i]->AddSymbol(symbol); - } - for (size_t j = 0; j < src[i]->NodeTypeIndexList().size(); ++j) { - dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], - src[i]->RealSizeList()[j], - src[i]->NoAlignSizeList()[j]); - src[i]->deleted_block_ = true; - } - } - } -} - -bool BlockMemAssigner::MergeDynamicBatchBlocks() { - bool merged = false; - std::map> dynamic_batch_blocks; - for (auto block : memory_blocks_) { - if (block == nullptr) { - continue; - } - std::string batch_label; - if (block->IsSameLabel(batch_label)) { - dynamic_batch_blocks[batch_label].emplace_back(block); - } - } - - auto it = dynamic_batch_blocks.begin(); - auto it_max = it; - - // find max block counts - for (; it != dynamic_batch_blocks.end(); ++it) { - if (it->second.size() > it_max->second.size()) { - it_max = it; - } - std::sort(it->second.begin(), it->second.end(), CompareBlockMaxSize); - } - if (it_max != dynamic_batch_blocks.end()) { - GELOGD("MergeDynamicBatch %s block counts %zu", it_max->first.c_str(), it_max->second.size()); - } - for (it = dynamic_batch_blocks.begin(); it != dynamic_batch_blocks.end(); ++it) { - if (it != it_max) { - GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str()); - MergeBlocks(it_max->second, it->second); - merged = true; - } - } - return merged; -} - // asending order static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) { if (left == nullptr || right == nullptr) { @@ -1597,38 +1672,93 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { } } +void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) { + if (block.memory_type_ == RT_MEMORY_HBM) { + if (block.first_continuous_block_) { + mem_offset += MEM_ALIGN_SIZE; + } + block.Resize(); + block.SetHeadOffset(mem_offset); + mem_offset += block.Size(); + block.SetTailOffset(mem_offset - 1); + } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) { + if (block.first_continuous_block_) { + p2p_mem_offset += MEM_ALIGN_SIZE; + } + block.Resize(); + block.SetHeadOffset(p2p_mem_offset); + p2p_mem_offset += block.Size(); + block.SetTailOffset(p2p_mem_offset - 1); + } +} + +bool DynamicBatchBlockReuse(MemoryBlock &block) { + return (block.IsSameBatchLabel() && block.reuse_mem_); +} + /// /// @ingroup domi_omg -/// @brief traverse memory size, resize, calculate offset +/// @brief get max batch memory size, others reuse this block memory /// @param [in&out] memory_blocks_ memory block, after calculating offset +/// |-dynamic batch block batch1| +/// |-dynamic batch block batch2----| +/// |-dynamic batch block batch3--| /// -void BlockMemAssigner::ResizeMemoryBlocks() { - for (auto &memory_block : memory_blocks_) { - if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { +void BlockMemAssigner::ResizeDynamicBatchBlocks() { + std::map> dynamic_batch_blocks; + for (auto block : memory_blocks_) { + if (block == nullptr) { continue; } - if (memory_block->memory_type_ == RT_MEMORY_HBM) { - if (memory_block->first_continuous_block_) { - mem_offset_ += MEM_ALIGN_SIZE; - } + // when memory is not reuseable, it can't be reused by different branch + if (DynamicBatchBlockReuse(*block)) { + dynamic_batch_blocks[block->batch_label_].emplace_back(block); + } + } - memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset_); - mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(mem_offset_ - 1); - } else if (memory_block->memory_type_ == RT_MEMORY_P2P_DDR) { - if (memory_block->first_continuous_block_) { - p2p_mem_offset_ += MEM_ALIGN_SIZE; + size_t max_mem_offset = mem_offset_; + size_t max_p2p_mem_offset = p2p_mem_offset_; + for (auto &batch_blocks : dynamic_batch_blocks) { + size_t mem_offset = mem_offset_; + size_t p2p_mem_offset = p2p_mem_offset_; + for (auto block : batch_blocks.second) { + if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { + continue; } + AddBlockMemOffset(mem_offset, p2p_mem_offset, *block); + } + if (mem_offset > max_mem_offset) { + max_mem_offset = mem_offset; + } + if (p2p_mem_offset > max_p2p_mem_offset) { + max_p2p_mem_offset = p2p_mem_offset; + } + GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset); + } + mem_offset_ = max_mem_offset; + p2p_mem_offset_ = max_p2p_mem_offset; +} - memory_block->Resize(); - memory_block->SetHeadOffset(p2p_mem_offset_); - p2p_mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(p2p_mem_offset_ - 1); +/// +/// @ingroup domi_omg +/// @brief traverse memory size, resize, calculate offset +/// @param [in&out] memory_blocks_ memory block, after calculating offset +/// |-not dynamic batch block-||-dynamic batch block batch1| |-zero copy block-| +/// |-not dynamic batch block-||-dynamic batch block batch2----||-zero copy block-| +/// |-not dynamic batch block-||-dynamic batch block batch3--| |-zero copy block-| +/// +void BlockMemAssigner::ResizeMemoryBlocks() { + for (auto &memory_block : memory_blocks_) { + if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_ + || DynamicBatchBlockReuse(*memory_block)) { + continue; } + + AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block); } - GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", - mem_offset_, p2p_mem_offset_); + ResizeDynamicBatchBlocks(); + GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu," + "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_); } /// @@ -1641,7 +1771,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() { /// @return Status result /// void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, - size_t real_size, size_t no_align_size, bool child_block) { + size_t real_size, size_t no_align_size, int32_t child_block_level) { ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); string graph_name = node_type.node->GetOwnerComputeGraph()->GetName(); @@ -1689,14 +1819,15 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), + GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " + "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, - block->continuous_block_, block->deleted_block_, node_type.ref_input); + block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, + block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + block->batch_label_.c_str()); } -void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { +void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) { if (block == nullptr) { return; } @@ -1709,9 +1840,14 @@ void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { real_size = block->RealSizeList()[index]; no_align_size = block->NoAlignSizeList()[index]; } - SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block); + SetOffsetSize(node_type_index, block, real_size, no_align_size, child_block_level); index++; } + + child_block_level++; + for (MemoryBlock *child_block : block->ChildBlockList()) { + SetBlockOpMemOffset(child_block, child_block_level); + } } void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) { @@ -1724,16 +1860,13 @@ void BlockMemAssigner::SetOpMemOffset(bool is_zero_copy) { continue; } - SetBlockOpMemOffset(memory_block, false); - for (MemoryBlock *child_block : memory_block->ChildBlockList()) { - SetBlockOpMemOffset(child_block, true); - } + SetBlockOpMemOffset(memory_block, 0); } if (!is_zero_copy) { for (const NodeTypeIndex &node_type_index : zero_memory_list_) { MemoryBlock block(0, 0); - SetOffsetSize(node_type_index, &block, 0, 0, false); + SetOffsetSize(node_type_index, &block, 0, 0, 0); } } } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index f3d26c1d..d514ca34 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -65,6 +65,7 @@ class MemoryBlock { stream_id_(stream_id), deleted_block_(false), reuse_mem_(reuse_mem), + same_stream_(true), input_index_(0), continuous_block_(false), first_continuous_block_(false), @@ -85,10 +86,14 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { + void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, + int64_t stream_id) { real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); node_type_index_list_.emplace_back(node, type, out_index, false); + if (stream_id != stream_id_) { + same_stream_ = false; + } } size_t Size() const { return block_size_; } @@ -106,6 +111,12 @@ class MemoryBlock { node_type_index_list_.emplace_back(node_type_index); real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); + if ((node_type_index.node != nullptr) && (node_type_index.node->GetOpDesc() != nullptr)) { + auto stream_id = node_type_index.node->GetOpDesc()->GetStreamId(); + if (stream_id != stream_id_) { + same_stream_ = false; + } + } } void AddSymbol(const std::string &symbol) { @@ -122,7 +133,7 @@ class MemoryBlock { std::string String(); - bool IsSameLabel(std::string &first_batch_label); + bool IsSameBatchLabel(); void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life); @@ -142,6 +153,7 @@ class MemoryBlock { int64_t stream_id_; bool deleted_block_; bool reuse_mem_; + bool same_stream_; uint32_t input_index_; bool continuous_block_; bool first_continuous_block_; @@ -149,6 +161,7 @@ class MemoryBlock { bool is_zero_copy_; std::map depend_stream_life_; int64_t memory_type_; + std::string batch_label_; private: size_t block_size_; std::vector real_size_list_; @@ -209,7 +222,7 @@ class BlockMemAssigner : public MemAssigner { void GetOutAndWorkSpaceMem(std::vector &all_memory_size); - void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector &workspace_memory); + void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector &workspace_memory, int64_t &total_size); /// /// @ingroup GE @@ -353,7 +366,7 @@ class BlockMemAssigner : public MemAssigner { /// @return void /// @author /// - void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory); + void ReleaseMemory(MemoryBlock *to_release, vector &reusable_memory, bool same_stream = true); /// /// @ingroup GE @@ -379,11 +392,11 @@ class BlockMemAssigner : public MemAssigner { /// /// @ingroup GE - /// @brief Merge memory blocks between different batchs + /// @brief Resize memory blocks for each batchs /// @return merge or not /// @author /// - bool MergeDynamicBatchBlocks(); + void ResizeDynamicBatchBlocks(); void AssignContinuousBlocks(); @@ -436,6 +449,17 @@ class BlockMemAssigner : public MemAssigner { int64_t atomic_addr_clean_id_ = 0; + size_t theory_min_memory_size_ = 0; + + size_t theory_memory_size_ = 0; + + std::string max_batch_label_; + + /// + /// @ [stream1][nodeid] + /// @[nodeid] [stream2][nodeid] + /// @ [stream2][nodeid] + /// DependStreamLife total_node_depend_stream_life_; }; } // namespace ge diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index ad0235d5..a6da4682 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -1646,9 +1646,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); + atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 93cb8d89..a97f8fdb 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2178,7 +2178,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data void *mem_addr = data.second.GetBasicAddr(); void *data_buf_addr = reinterpret_cast(reinterpret_cast(data_buf.data)); uint64_t data_buf_length = data_buf.length; - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", + GELOGI("CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); } diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 34fb7ff3..22a657ad 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -61,7 +61,7 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); continue); - GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + GELOGI("GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_input_size.push_back(tensor_size); } @@ -96,7 +96,7 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); continue); - GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + GELOGI("GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_output_size.push_back(tensor_size); } From 253f63b791ca335c293eeeae52f01b679ac564bd Mon Sep 17 00:00:00 2001 From: dongduo Date: Wed, 9 Dec 2020 10:52:14 +0800 Subject: [PATCH 052/127] Fix code check --- ge/common/helper/model_helper.cc | 10 +++++----- ge/graph/load/new_model_manager/davinci_model.cc | 5 +++-- ge/graph/load/new_model_manager/model_manager.cc | 6 ++++-- .../new_model_manager/task_info/kernel_task_info.cc | 2 +- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 +- ge/single_op/task/aicpu_kernel_task_builder.cc | 2 +- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index efb93d8a..e55af956 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -120,11 +120,11 @@ Status ModelHelper::SaveModelTbeKernel(std::shared_ptr &om_fil TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); if (tbe_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, - ModelPartitionType::TBE_KERNELS, - ge_model->GetTBEKernelStore().Data(), - ge_model->GetTBEKernelStore().DataSize(), model_index), - "Add tbe kernel partition failed"); + GE_CHK_STATUS_RET( + SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, + ge_model->GetTBEKernelStore().Data(), + ge_model->GetTBEKernelStore().DataSize(), model_index), + "Add tbe kernel partition failed"); } // no need to check value, DATA->NetOutput (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 0feab3c5..eb95fde0 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2695,8 +2695,9 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b is_getnext_sink_dynamic_ = true; cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - GE_CHK_RT_RET(rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST)); + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); + GE_CHK_RT_RET(ret); } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 145afa6d..ba63757e 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1086,8 +1086,10 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", - model.key.c_str(), strerror(errno)); + ACL_ERROR_GE_PARAM_INVALID, + "input key file path %s is invalid, %s", + model.key.c_str(), + strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 7b11c53e..e58a00b4 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -877,7 +877,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k if (kernel_type_ == ccKernelType::CUST_AI_CPU) { bool loaded = false; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), - "launch cust aicpu so failed"); + "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 38407160..3474b3cb 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -646,7 +646,7 @@ Status AicpuNodeTask::Init(const HybridModel &model) { if (kernel_type == ccKernelType::CUST_AI_CPU) { bool loaded = false; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), - "load cust aicpu so failed."); + "load cust aicpu so failed."); if (!loaded) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); } diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 0b459e7a..196b2fa6 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -64,7 +64,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; bool loaded = false; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), - "launch cust aicpu so failed"); + "launch cust aicpu so failed"); if (!loaded) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); } From 608e9f09d11dc1faa082808fca1dd1a058948cb8 Mon Sep 17 00:00:00 2001 From: dongduo Date: Wed, 9 Dec 2020 10:56:14 +0800 Subject: [PATCH 053/127] Fix code check --- ge/graph/load/new_model_manager/model_manager.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index ba63757e..7eb84f1d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1086,10 +1086,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, - "input key file path %s is invalid, %s", - model.key.c_str(), - strerror(errno)); + ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; From 4bb75fd3081409a8701219b1503bf80bec36e5e1 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 9 Dec 2020 10:57:38 +0800 Subject: [PATCH 054/127] fix bud. --- .../ops_kernel_builder_manager.cc | 20 ++++++++----------- .../ops_kernel_builder_manager.h | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index 167be47b..e0001fcd 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -33,8 +33,6 @@ const std::vector kHcclBuilderLibs = { "libhvd_opskernel_builder.so", "libhcom_gradtune_opskernel_builder.so" }; - -const std::string kAicoreUtilsLib = "libaicore_utils_runtime.so"; } // namespace OpsKernelBuilderManager::~OpsKernelBuilderManager() { // it's OK to call Finalize multiply times @@ -47,11 +45,13 @@ OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { } Status OpsKernelBuilderManager::Initialize(const map &options, bool is_train) { - std::string lib_paths; - GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths, is_train)); - plugin_manager_.reset(new (std::nothrow)PluginManager()); - GE_CHECK_NOTNULL(plugin_manager_); - GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); + if (is_train) { + std::string lib_paths; + GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); + plugin_manager_.reset(new (std::nothrow)PluginManager()); + GE_CHECK_NOTNULL(plugin_manager_); + GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); + } auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); GELOGI("Number of OpBuild = %zu", kernel_builders.size()); @@ -100,8 +100,7 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n return nullptr; } -Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths, - bool is_train) { +Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths) { GELOGD("Start to execute GetLibPaths"); std::string path_base = PluginManager::GetPath(); std::string so_path = "plugin/opskernel/"; @@ -110,9 +109,6 @@ Status OpsKernelBuilderManager::GetLibPaths(const std::mapsecond != "0") { diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index 207ebc79..7a95ddfa 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -48,7 +48,7 @@ class OpsKernelBuilderManager { private: OpsKernelBuilderManager() = default; - static Status GetLibPaths(const std::map &options, std::string &lib_paths, bool is_train); + static Status GetLibPaths(const std::map &options, std::string &lib_paths); std::unique_ptr plugin_manager_; std::map ops_kernel_builders_{}; From 5e7bad3b0ffb236dd2589f569d7b783a8d0f286a Mon Sep 17 00:00:00 2001 From: dongduo Date: Wed, 9 Dec 2020 10:58:18 +0800 Subject: [PATCH 055/127] Fix code check --- ge/single_op/task/aicpu_kernel_task_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 196b2fa6..df592a87 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -64,7 +64,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; bool loaded = false; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), - "launch cust aicpu so failed"); + "launch cust aicpu so failed"); if (!loaded) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); } From 9894748d1dbfd2238562073df29705b65e65b208 Mon Sep 17 00:00:00 2001 From: dongduo Date: Wed, 9 Dec 2020 14:01:10 +0800 Subject: [PATCH 056/127] Fix code check --- ge/host_kernels/strided_slice_kernel.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index 213f6d91..b1bfb10a 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -272,6 +272,10 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &x_dims) { auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); + if (begin_data_type_size == 0) { + GELOGW("Param begin_data_type_size should not be zero."); + return; + } size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; for (size_t i = 0; i < final_dim_num; i++) { From 1f0268e87a6f50bc15e18f757a53c8883cc3f8c9 Mon Sep 17 00:00:00 2001 From: wxl Date: Wed, 9 Dec 2020 14:11:41 +0800 Subject: [PATCH 057/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 34663493..6dfda036 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -50,6 +50,9 @@ const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default"; const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize"; const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; + +const std::string kInputShape = "input_shape"; +const std::string kInputFormat = "input_format"; } // namespace static graphStatus CheckGlobalOptions(std::map &global_options) { @@ -232,6 +235,7 @@ class Impl { ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); + graphStatus UpdateDataOp(const Graph &graph); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); @@ -242,6 +246,36 @@ class Impl { OmgContext omg_context_; }; +graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { + GELOGD("Enter Update Data Attr Process!"); + if (options_.find(kInputShape) == options_.end()) { + return GRAPH_SUCCESS; + } + unordered_map> shape_map; + vector>> user_shape_map; + GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), + return GRAPH_PARAM_INVALID, "parse input shape failed!"); + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + auto tensor = op->MutableInputDesc(0); + string data_op_name = op->GetName(); + auto iter = shape_map.find(data_op_name); + if (iter != shape_map.end()) { + tensor->SetShape(ge::GeShape(iter->second)); + GELOGD("update input [%s] shape info", data_op_name.c_str()); + } else { + GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); + } + } + } + return GRAPH_SUCCESS; +} + graphStatus Impl::CheckOptions(const std::map &options) { for (auto &ele : options) { auto it = ge::ir_option::ir_builder_suppported_options.find(ele.first); @@ -437,7 +471,6 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vectorsecond); - tensor->SetShape(data_shape); GELOGD("Data op get shape from Context and update [%s] shape info", data_op_name.c_str()); } else { data_shape = tensor->GetShape(); From ae3c7823efa9c8b61d8aefa66d44590ea3e52a7a Mon Sep 17 00:00:00 2001 From: wxl Date: Wed, 9 Dec 2020 14:22:29 +0800 Subject: [PATCH 058/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 6dfda036..bfc81c7e 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -235,7 +235,7 @@ class Impl { ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); - graphStatus UpdateDataOp(const Graph &graph); + graphStatus UpdateDataOpAttr(const Graph &graph); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); From 87a99e89fc6f468be92dbf7dd6958120e73d1806 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Wed, 9 Dec 2020 16:34:25 +0800 Subject: [PATCH 059/127] Feature: Cancel get default format form ge ir build --- ge/ir_build/ge_ir_build.cc | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index a206a164..96ae9b24 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -226,7 +226,7 @@ class Impl { ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus GetDefaultInputShapeAndFormat(const Graph &graph, string &default_shape, string &input_format); + graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); @@ -280,7 +280,7 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } -graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &default_shape, string &input_format) { +graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { @@ -308,14 +308,11 @@ graphStatus Impl::GetDefaultInputShapeAndFormat(const Graph &graph, string &defa default_shape += tmp_shape_str; } - ge::Format data_format = tensor.GetFormat(); - input_format.assign(ge::TypeUtils::FormatToSerialString(data_format)); - GELOGD("Data op name: %s, data shape: %s, data format: %s.", data_op_name.c_str(), tmp_shape_str.c_str(), - input_format.c_str()); + GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); } } default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); - GELOGI("Get default data op shape: %s, format: %s from ge ir graph.", default_shape.c_str(), input_format.c_str()); + GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); return GRAPH_SUCCESS; } @@ -338,14 +335,13 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Wed, 9 Dec 2020 21:21:02 +0800 Subject: [PATCH 060/127] bug fix --- ge/CMakeLists.txt | 3 ++- ge/graph/manager/graph_manager.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index b037f4a4..399ad051 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -154,7 +154,7 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" - + "graph/passes/const_pass.cc" "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" @@ -513,6 +513,7 @@ set(INFER_SRC_LIST "graph/passes/control_trigger_pass.cc" "graph/passes/cond_pass.cc" "graph/passes/cond_remove_pass.cc" + "graph/passes/const_pass.cc" "graph/passes/for_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_pass.cc" diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index d4c6ca8d..200b7a6a 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -56,7 +56,7 @@ #include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" -#include "graph/passes/const_pass.cc" +#include "graph/passes/const_pass.h" #include "graph/passes/control_trigger_pass.h" #include "graph/passes/ctrl_edge_transfer_pass.h" #include "graph/passes/dimension_adjust_pass.h" From 18ab1af64676452483696399671ab67d56b61872 Mon Sep 17 00:00:00 2001 From: wxl Date: Wed, 9 Dec 2020 21:31:11 +0800 Subject: [PATCH 061/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index bfc81c7e..1128207a 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -262,11 +262,13 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { ge::OpDescPtr op = input_node->GetOpDesc(); GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { - auto tensor = op->MutableInputDesc(0); + auto tensor_input = op->MutableInputDesc(0); + auto tensor_output = op->MutableOutputDesc(0); string data_op_name = op->GetName(); auto iter = shape_map.find(data_op_name); if (iter != shape_map.end()) { - tensor->SetShape(ge::GeShape(iter->second)); + tensor_input->SetShape(ge::GeShape(iter->second)); + tensor_output->SetShape(ge::GeShape(iter->second)); GELOGD("update input [%s] shape info", data_op_name.c_str()); } else { GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); @@ -360,7 +362,10 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Thu, 10 Dec 2020 09:29:26 +0800 Subject: [PATCH 062/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 1128207a..289b0b9e 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -470,6 +470,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vectorGetType() == DATA) { GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); auto tensor = op->MutableInputDesc(0); + GE_CHECK_NOTNULL(tensor); string data_op_name = op->GetName(); GELOGD("Data op name: %s", data_op_name.c_str()); ge::GeShape data_shape; From 16efa936d4cae6bb0be1cff120f2cc20f9379a9c Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 10 Dec 2020 09:52:07 +0800 Subject: [PATCH 063/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 289b0b9e..6ff3e5e1 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -53,6 +53,8 @@ const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; const std::string kInputShape = "input_shape"; const std::string kInputFormat = "input_format"; +const std::string kReUseMemEnable = "1"; +const std::string kReUseMemDisEnable = "0"; } // namespace static graphStatus CheckGlobalOptions(std::map &global_options) { @@ -313,6 +315,12 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_PARAM_INVALID; } } + // Check option EXEC_DISABLE_REUSED_MEMORY + it = options_.find(EXEC_DISABLE_REUSED_MEMORY); + if (it != options_.end() && it->second != kReUseMemEnable && it->second != kReUseMemDisEnable) { + GELOGE(GRAPH_PARAM_INVALID, "option(EXEC_DISABLE_REUSED_MEMORY) value[%s] is invalid ", it->second.c_str()); + return GRAPH_PARAM_INVALID; + } return GRAPH_SUCCESS; } From d6388feddecc7d5f9086ae991c7bf67929c5d4df Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 10 Dec 2020 11:36:05 +0800 Subject: [PATCH 064/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 6ff3e5e1..b353a72a 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -316,7 +316,7 @@ graphStatus Impl::CheckOptions(const std::map &options } } // Check option EXEC_DISABLE_REUSED_MEMORY - it = options_.find(EXEC_DISABLE_REUSED_MEMORY); + it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); if (it != options_.end() && it->second != kReUseMemEnable && it->second != kReUseMemDisEnable) { GELOGE(GRAPH_PARAM_INVALID, "option(EXEC_DISABLE_REUSED_MEMORY) value[%s] is invalid ", it->second.c_str()); return GRAPH_PARAM_INVALID; From 6168ed47a4b0bad4261e00a62133dda783a88514 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Thu, 10 Dec 2020 14:05:46 +0800 Subject: [PATCH 065/127] update submodule --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index 7472245f..d19c9c5c 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 7472245fcaed273b7cff99a1f6e6bab3313be684 +Subproject commit d19c9c5c92f21a0335c18681dcceed44f3a54ddc diff --git a/parser b/parser index 11c88ffc..c78651fe 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 11c88ffc907399db084766bf9d5c171e5432eb8d +Subproject commit c78651fee671ac079c56d2c3ff0d0439ea82f2fa From c60279ac6284f1c7fc0ade65a100298290e19860 Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 10 Dec 2020 14:05:48 +0800 Subject: [PATCH 066/127] ir build optimize --- ge/ir_build/ge_ir_build.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index b353a72a..f9c4e259 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -266,6 +266,8 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { if (op->GetType() == DATA) { auto tensor_input = op->MutableInputDesc(0); auto tensor_output = op->MutableOutputDesc(0); + GE_CHECK_NOTNULL(tensor_input); + GE_CHECK_NOTNULL(tensor_output); string data_op_name = op->GetName(); auto iter = shape_map.find(data_op_name); if (iter != shape_map.end()) { @@ -317,8 +319,7 @@ graphStatus Impl::CheckOptions(const std::map &options } // Check option EXEC_DISABLE_REUSED_MEMORY it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); - if (it != options_.end() && it->second != kReUseMemEnable && it->second != kReUseMemDisEnable) { - GELOGE(GRAPH_PARAM_INVALID, "option(EXEC_DISABLE_REUSED_MEMORY) value[%s] is invalid ", it->second.c_str()); + if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { return GRAPH_PARAM_INVALID; } return GRAPH_SUCCESS; From 84253df880825d58466f4c40908a3f09d2280fbf Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 14:30:17 +0800 Subject: [PATCH 067/127] Fix code check --- .../load/new_model_manager/task_info/kernel_task_info.cc | 2 +- ge/graph/partition/engine_place.cc | 2 +- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 4 ++-- ge/init/gelib.h | 6 +++--- ge/single_op/task/aicpu_kernel_task_builder.cc | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 163e3134..1f398309 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -884,7 +884,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k if (kernel_type_ == ccKernelType::CUST_AI_CPU) { bool loaded = false; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), - "launch cust aicpu so failed"); + "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/ge/graph/partition/engine_place.cc b/ge/graph/partition/engine_place.cc index f4ebbdca..f71585dc 100755 --- a/ge/graph/partition/engine_place.cc +++ b/ge/graph/partition/engine_place.cc @@ -56,7 +56,7 @@ Status EnginePlacer::Run() { } bool is_check_support_success = true; // Assign engine for each node in the graph - ge::GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStaistic(); + ge::GELib::GetInstance()->().InitPerformanceStaistic(); for (const auto &node_ptr : compute_graph_->GetDirectNode()) { GE_CHECK_NOTNULL(node_ptr); auto op_desc = node_ptr->GetOpDesc(); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 3474b3cb..7330f616 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -645,8 +645,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::CUST_AI_CPU) { bool loaded = false; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), - "load cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), + "load cust aicpu so failed."); if (!loaded) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); } diff --git a/ge/init/gelib.h b/ge/init/gelib.h index e52b8dd6..c04dc898 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -48,13 +48,13 @@ class GELib { Status Finalize(); // get DNNEngineManager object - DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } + DNNEngineManager &DNNEngineManagerObj() const { return engineManager_; } // get OpsKernelManager object - OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } + OpsKernelManager &OpsKernelManagerObj() const { return opsManager_; } // get SessionManager object - SessionManager &SessionManagerObj() { return sessionManager_; } + SessionManager &SessionManagerObj() const { return sessionManager_; } // get Initial flag bool InitFlag() const { return init_flag_; } diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index df592a87..cd218c94 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -63,8 +63,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; bool loaded = false; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), - "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), + "launch cust aicpu so failed"); if (!loaded) { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); } From c42fb4dc20cfa3752cf6a688b9a3faa063e2a31d Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 14:34:36 +0800 Subject: [PATCH 068/127] Fix code check --- ge/graph/partition/engine_place.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/partition/engine_place.cc b/ge/graph/partition/engine_place.cc index f71585dc..f4ebbdca 100755 --- a/ge/graph/partition/engine_place.cc +++ b/ge/graph/partition/engine_place.cc @@ -56,7 +56,7 @@ Status EnginePlacer::Run() { } bool is_check_support_success = true; // Assign engine for each node in the graph - ge::GELib::GetInstance()->().InitPerformanceStaistic(); + ge::GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStaistic(); for (const auto &node_ptr : compute_graph_->GetDirectNode()) { GE_CHECK_NOTNULL(node_ptr); auto op_desc = node_ptr->GetOpDesc(); From 0bc3a5e58e8bb0068a8a6204bf5f3b570a770ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Thu, 10 Dec 2020 15:30:15 +0800 Subject: [PATCH 069/127] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?= =?UTF-8?q?!569=20:=20bug=20fix'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/CMakeLists.txt | 3 +-- ge/graph/manager/graph_manager.cc | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 399ad051..b037f4a4 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -154,7 +154,7 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" - "graph/passes/const_pass.cc" + "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" @@ -513,7 +513,6 @@ set(INFER_SRC_LIST "graph/passes/control_trigger_pass.cc" "graph/passes/cond_pass.cc" "graph/passes/cond_remove_pass.cc" - "graph/passes/const_pass.cc" "graph/passes/for_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_pass.cc" diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 200b7a6a..d4c6ca8d 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -56,7 +56,7 @@ #include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" -#include "graph/passes/const_pass.h" +#include "graph/passes/const_pass.cc" #include "graph/passes/control_trigger_pass.h" #include "graph/passes/ctrl_edge_transfer_pass.h" #include "graph/passes/dimension_adjust_pass.h" From 5ff32578df71418fb70dca2e799fd3e36bcd6dbe Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 16:28:23 +0800 Subject: [PATCH 070/127] Fix code check --- ge/init/gelib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/init/gelib.h b/ge/init/gelib.h index c04dc898..a2aca99d 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -48,13 +48,13 @@ class GELib { Status Finalize(); // get DNNEngineManager object - DNNEngineManager &DNNEngineManagerObj() const { return engineManager_; } + const DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } // get OpsKernelManager object - OpsKernelManager &OpsKernelManagerObj() const { return opsManager_; } + const OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } // get SessionManager object - SessionManager &SessionManagerObj() const { return sessionManager_; } + const SessionManager &SessionManagerObj() { return sessionManager_; } // get Initial flag bool InitFlag() const { return init_flag_; } From 71777a9931cdc357a72919ce3e31d05f5fba1016 Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 16:51:06 +0800 Subject: [PATCH 071/127] Fix code check --- ge/init/gelib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/init/gelib.h b/ge/init/gelib.h index a2aca99d..e52b8dd6 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -48,13 +48,13 @@ class GELib { Status Finalize(); // get DNNEngineManager object - const DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } + DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } // get OpsKernelManager object - const OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } + OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } // get SessionManager object - const SessionManager &SessionManagerObj() { return sessionManager_; } + SessionManager &SessionManagerObj() { return sessionManager_; } // get Initial flag bool InitFlag() const { return init_flag_; } From 422ac970718d78ef7a1b7e9e02fe80a5d21dbaec Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Thu, 10 Dec 2020 16:52:25 +0800 Subject: [PATCH 072/127] error message add --- ge/common/ge/plugin_manager.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 7bb1310c..75a36d99 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -123,7 +123,10 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec if (handle == nullptr) { const char *error = mmDlerror(); GE_IF_BOOL_EXEC(error == nullptr, error = ""); - GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error); + ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, + {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); + GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", + file_path_dlopen.c_str(), error); continue; } @@ -132,6 +135,9 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec for (const auto &func_name : func_check_list) { auto real_fn = (void (*)())mmDlsym(handle, const_cast(func_name.c_str())); if (real_fn == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, + {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + + FmtToStr(func_name) + " is not existed!"}); GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), func_name.c_str()); is_valid = false; From 70b2a12c818e7666c5435e177417a734af3dfe15 Mon Sep 17 00:00:00 2001 From: lichun Date: Thu, 10 Dec 2020 16:54:48 +0800 Subject: [PATCH 073/127] fix error fusion in transop breadth fusion pass --- ge/graph/passes/transop_breadth_fusion_pass.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 689510f0..654c3822 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -70,8 +70,10 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No trans_data_type = true; trans_format = true; trans_shape = true; - } else if (node->GetType() == RESHAPE) { + } else if (node->GetType() == RESHAPE || node->GetType() == EXPANDDIMS || node->GetType() == SQUEEZE) { trans_shape = true; + } else if (node->GetType() == REFORMAT) { + trans_format = true; } id << node->GetType() << '-' << anchor_index; From f23a4de0e0e444d486884b0efba55cfbb2fbb95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Thu, 10 Dec 2020 16:55:32 +0800 Subject: [PATCH 074/127] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?= =?UTF-8?q?!536=20:=20Decrease=20transformer's=20om=20size=20in=20dynamic?= =?UTF-8?q?=20dims=20scenario'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/CMakeLists.txt | 1 - ge/ge_inference.mk | 1 - ge/ge_runner.mk | 1 - ge/graph/build/model_builder.cc | 1 - ge/graph/manager/graph_manager.cc | 5 - ge/graph/passes/attach_stream_label_pass.cc | 28 +- ge/graph/passes/attach_stream_label_pass.h | 4 +- ge/graph/passes/base_pass.cc | 2 +- .../common_subexpression_elimination_pass.cc | 3 +- ge/graph/passes/const_pass.cc | 55 --- ge/graph/passes/const_pass.h | 29 -- ge/graph/passes/dimension_adjust_pass.cc | 64 --- ge/graph/passes/dimension_adjust_pass.h | 4 - ge/graph/passes/enter_pass.cc | 48 +-- ge/graph/passes/enter_pass.h | 3 +- ge/graph/passes/folding_pass.cc | 5 +- ge/graph/passes/merge_to_stream_merge_pass.cc | 10 + ge/graph/passes/next_iteration_pass.cc | 262 ++++++++---- ge/graph/passes/next_iteration_pass.h | 16 +- ge/graph/preprocess/multi_batch_copy_graph.cc | 401 +++--------------- ge/graph/preprocess/multi_batch_copy_graph.h | 16 +- 21 files changed, 295 insertions(+), 664 deletions(-) delete mode 100644 ge/graph/passes/const_pass.cc delete mode 100644 ge/graph/passes/const_pass.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index b037f4a4..88a5c52f 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -154,7 +154,6 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" - "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index fe76a612..0987f148 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -189,7 +189,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/control_trigger_pass.cc \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ - graph/passes/const_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ graph/passes/assign_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 58ad1266..a2679ed1 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -123,7 +123,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ - graph/passes/const_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ graph/passes/dimension_compute_pass.cc \ diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 3be45895..37eb499a 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -224,7 +224,6 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ GeTensorDesc &tensor_desc = weight->MutableTensorDesc(); size_t output_size = weight->GetData().size(); TensorUtils::SetDataOffset(tensor_desc, mem_offset); - GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size); mem_offset += output_size; } return SUCCESS; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index d4c6ca8d..9ce68d76 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -56,7 +56,6 @@ #include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" -#include "graph/passes/const_pass.cc" #include "graph/passes/control_trigger_pass.h" #include "graph/passes/ctrl_edge_transfer_pass.h" #include "graph/passes/dimension_adjust_pass.h" @@ -2138,7 +2137,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { TransposeTransDataPass transpose_transdata_pass; TransOpSymmetryEliminationPass symmetry_elimination_pass; DimensionComputePass dimension_compute_pass; - ConstPass const_pass; names_to_passes.emplace_back("EnterPass", &enter_pass); names_to_passes.emplace_back("AddNPass", &addn_pass); names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); @@ -2152,7 +2150,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); - names_to_passes.emplace_back("ConstPass", &const_pass); GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2"); @@ -2193,8 +2190,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", new (std::nothrow) VariableRefUselessControlOutDeletePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) - GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::CommonSubexpressionEliminationPass", - new (std::nothrow) CommonSubexpressionEliminationPass)); if (options_.train_graph_flag) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index cd3509c7..c0e0f669 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -18,8 +18,6 @@ #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" -using std::string; - namespace ge { Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { GELOGD("AttachStreamLabelPass Enter."); @@ -189,10 +187,21 @@ Status AttachStreamLabelPass::UpdateEnterNode() { } std::stack enter_nodes; + std::string batch_label; for (const auto &enter_node : pair.second) { enter_nodes.emplace(enter_node); + std::string tmp_label; + (void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + if (!tmp_label.empty()) { + if (batch_label.empty()) { + batch_label = tmp_label; + } else if (batch_label != tmp_label) { + GELOGE(FAILED, "multi batch_label exist, label1=%s, label2=%s.", batch_label.c_str(), tmp_label.c_str()); + return FAILED; + } + } } - if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) { + if (UpdateLoopBranch(enter_nodes, active_label_list[0], batch_label) != SUCCESS) { GELOGE(FAILED, "Update stream_label for loop_branch failed."); return FAILED; } @@ -217,7 +226,10 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no } for (const auto &enter_node : enter_nodes) { - GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); + GE_CHECK_NOTNULL(enter_node->GetOpDesc()); + if (enter_node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL)) { + GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); + } } return SUCCESS; } @@ -229,7 +241,8 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no /// @param [in] batch_label /// @return Status /// -Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const string &stream_label) { +Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, + const std::string &batch_label) { std::stack nodes(enter_nodes); NodePtr cur_node = nullptr; while (!nodes.empty()) { @@ -238,6 +251,11 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ for (const NodePtr &out_node : cur_node->GetOutAllNodes()) { OpDescPtr out_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_desc); + std::string tmp_label; + (void)AttrUtils::GetStr(out_desc, ATTR_NAME_BATCH_LABEL, tmp_label); + if (!tmp_label.empty() && (tmp_label != batch_label)) { + continue; + } std::string out_type = out_desc->GetType(); bool need_skip = out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index ad71d58f..19f11480 100755 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -58,9 +58,11 @@ class AttachStreamLabelPass : public GraphPass { /// @brief Update stream_label for loop_branch /// @param [in] enter_nodes /// @param [in] stream_label + /// @param [in] batch_label /// @return Status /// - static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label); + static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, + const std::string &batch_label); /// /// @brief Update stream_label start with enter nodes diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 8d0bcf25..68efbeb9 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -96,7 +96,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder node->GetName().c_str(), node->GetType().c_str()); continue; } - if (node_to_re_pass->IsAllInNodesSeen(nodes_seen) || node_to_re_pass->GetType() == ENTER) { + if (node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); nodes_re_pass.insert(node_to_re_pass); } else { diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index 9e771b65..a4662d5d 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -58,8 +58,7 @@ std::string GetCseKey(const NodePtr &node) { /// To avoid delete wrong nodes(e.g. stateful nodes), /// only nodes have folding kernel will be considered for the CSE process bool IsNodeSupportCse(const NodePtr &node) { - if (HostCpuEngine::CheckSupported(NodeUtils::GetNodeType(*node)) || node->GetType() == CONSTANT || - node->GetType() == CONSTANTOP) { + if (HostCpuEngine::CheckSupported(NodeUtils::GetNodeType(*node))) { return true; } return folding_pass::GetKernelByType(node) != nullptr; diff --git a/ge/graph/passes/const_pass.cc b/ge/graph/passes/const_pass.cc deleted file mode 100644 index 42b3c23f..00000000 --- a/ge/graph/passes/const_pass.cc +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/const_pass.h" - -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/graph_utils.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" - -namespace ge { -Status ConstPass::Run(NodePtr &node) { - GE_CHECK_NOTNULL(node); - - if ((node->GetType() != CONSTANT) && (node->GetType() != CONSTANTOP)) { - return SUCCESS; - } - GELOGD("ConstPass running, node: %s.", node->GetName().c_str()); - - // const has no control input - if (node->GetInControlNodes().empty()) { - auto out_ctrl_anchor = node->GetOutControlAnchor(); - if (out_ctrl_anchor != nullptr) { - GELOGD("Node: %s unlink all out control edge.", node->GetName().c_str()); - out_ctrl_anchor->UnlinkAll(); - } - - if (node->GetOutAllNodes().empty()) { - // it is an isolated const, just remove it. - GELOGD("Delete isolated const: %s.", node->GetName().c_str()); - auto graph = node->GetOwnerComputeGraph(); - if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Remove const %s failed.", node->GetName().c_str()); - return FAILED; - } - AddNodeDeleted(node); - } - } - - return SUCCESS; -} -} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/const_pass.h b/ge/graph/passes/const_pass.h deleted file mode 100644 index a7e011ec..00000000 --- a/ge/graph/passes/const_pass.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_CONST_PASS_H_ -#define GE_GRAPH_PASSES_CONST_PASS_H_ - -#include "graph/passes/base_pass.h" - -namespace ge { -class ConstPass : public BaseNodePass { - public: - Status Run(NodePtr &node) override; -}; -} // namespace ge - -#endif // GE_GRAPH_PASSES_CONST_PASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index bfb9cb4f..fc5fe69f 100755 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -80,71 +80,7 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { } } - ret = DealWithInNodes(node); - if (ret != SUCCESS) { - GELOGE(ret, "DealWithInNodes of %s failed.", node->GetName().c_str()); - return ret; - } - std::vector data_relink_io_map = {kDataInputIndex}; return IsolateAndDeleteNode(node, data_relink_io_map); } - -Status DimensionAdjustPass::DealWithInNodes(NodePtr &node) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(node->GetOpDesc()); - auto graph = node->GetOwnerComputeGraph(); - auto in_data_anchors = node->GetAllInDataAnchors(); - for (auto &in_data_anchor : in_data_anchors) { - if (in_data_anchor == nullptr) { - continue; - } - auto in_node_anchor = in_data_anchor->GetPeerOutAnchor(); - if (in_node_anchor == nullptr) { - continue; - } - auto in_node = in_node_anchor->GetOwnerNode(); - if (in_node->GetType() == SWITCHN) { - GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); - auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); - auto identity = - AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); - GE_CHECK_NOTNULL(identity); - GELOGI("Create new identity node[%s] success.", identity->GetName().c_str()); - GE_CHK_STATUS_RET(GraphUtils::AddEdge(in_node_anchor, identity->GetInDataAnchor(0))) - GE_CHECK_NOTNULL(identity->GetOutControlAnchor()); - if (identity->GetOutControlAnchor()->IsLinkedWith(node->GetInControlAnchor())) { - continue; - } - GE_CHK_STATUS_RET(GraphUtils::AddEdge(identity->GetOutControlAnchor(), node->GetInControlAnchor())) - } - } - - return SUCCESS; -} - -NodePtr DimensionAdjustPass::AddIdentityNodeToGraph(const string &name, const GeTensorDesc &tensor, - ComputeGraphPtr &graph) { - if (graph == nullptr) { - GELOGE(INTERNAL_ERROR, "Comput graph ptr is null in creating identity node."); - return nullptr; - } - - OpDescPtr desc = MakeShared("", ""); - if (desc == nullptr) { - GELOGE(MEMALLOC_FAILED, "Failed to create op desc."); - return nullptr; - } - - desc->SetName(name); - desc->SetType(IDENTITY); - auto ret = desc->AddInputDesc(tensor); - auto ret2 = desc->AddOutputDesc(tensor); - if ((ret != GRAPH_SUCCESS) || (ret2 != GRAPH_SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Failed to add input/output desc in creating identity."); - return nullptr; - } - - return graph->AddNodeFront(desc); -} } // namespace ge diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 7766f140..685d9694 100755 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -34,10 +34,6 @@ namespace ge { class DimensionAdjustPass : public BaseNodePass { public: Status Run(ge::NodePtr &node) override; - - private: - Status DealWithInNodes(ge::NodePtr &node); - NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tensor, ComputeGraphPtr &graph); }; } // namespace ge diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 20e60403..afeca78f 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -23,7 +23,6 @@ namespace { const size_t kOutNodesNum = 1; -const size_t kInCtrlNodesNum = 1; } namespace ge { @@ -56,7 +55,6 @@ Status EnterPass::Run(NodePtr &node) { if (out_ctrl_node == nullptr) { continue; } - GELOGD("Remove control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); @@ -64,12 +62,8 @@ Status EnterPass::Run(NodePtr &node) { } } } else { - if (OptimizeEnterWithOnlyOutData(node, in_node) != SUCCESS) { - GELOGE(FAILED, "Optimize enter node[%s] with only out data node failed.", node->GetName().c_str()); - return FAILED; - } - if (UnlinkCtrlEdgeBeforeConst(node) != SUCCESS) { - GELOGE(FAILED, "Unlink control edge before const of node[%s]'s out nodes failed.", node->GetName().c_str()); + if (OptimizeEnter(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); return FAILED; } } @@ -78,7 +72,7 @@ Status EnterPass::Run(NodePtr &node) { return SUCCESS; } -Status EnterPass::OptimizeEnterWithOnlyOutData(NodePtr &node, NodePtr &in_node) { +Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } @@ -89,45 +83,17 @@ Status EnterPass::OptimizeEnterWithOnlyOutData(NodePtr &node, NodePtr &in_node) } GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))) + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)) - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)) + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); } - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; } - -Status EnterPass::UnlinkCtrlEdgeBeforeConst(NodePtr &node) { - auto out_ctrl_nodes = node->GetOutControlNodes(); - if (out_ctrl_nodes.empty()) { - return SUCCESS; - } - auto out_ctrl_anchor = node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_ctrl_anchor); - - for (auto &out_ctrl_node : out_ctrl_nodes) { - GE_CHECK_NOTNULL(out_ctrl_node); - if ((out_ctrl_node->GetType() != CONSTANT) && (out_ctrl_node->GetType() != CONSTANTOP)) { - continue; - } - auto in_ctrl_nodes = out_ctrl_node->GetInControlNodes(); - if (in_ctrl_nodes.size() != kInCtrlNodesNum) { - continue; - } - GE_CHK_STATUS_RET(out_ctrl_anchor->Unlink(out_ctrl_node->GetInControlAnchor())) - auto out_nodes_of_const = out_ctrl_node->GetOutAllNodes(); - for (auto &out_node_of_const : out_nodes_of_const) { - if (!out_ctrl_anchor->IsLinkedWith(out_node_of_const->GetInControlAnchor())) { - GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(out_node_of_const->GetInControlAnchor())) - } - } - } - return SUCCESS; -} } // namespace ge diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 67366297..677516ff 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -25,8 +25,7 @@ class EnterPass : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status OptimizeEnterWithOnlyOutData(NodePtr &node, NodePtr &in_node); - Status UnlinkCtrlEdgeBeforeConst(NodePtr &node); + Status OptimizeEnter(NodePtr &node, NodePtr &in_node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc index 227a0f61..93dc2c40 100755 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -173,7 +173,10 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { continue; } auto in_node = in_node_anchor->GetOwnerNode(); - if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH) || (in_node->GetType() == SWITCHN)) { + if (in_node == nullptr) { + continue; + } + if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH)) { GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); auto ret = in_node_anchor->Unlink(in_data_anchor); if (ret != SUCCESS) { diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index c1a57a61..103fbb1b 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -89,6 +89,16 @@ Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, co GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed"); } + if (merge_op_desc->HasAttr(ATTR_NAME_BATCH_LABEL)) { + string batch_label; + (void)AttrUtils::GetStr(merge_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (!batch_label.empty()) { + auto stream_merge_desc = stream_merge->GetOpDesc(); + GE_CHECK_NOTNULL(stream_merge_desc); + (void)AttrUtils::SetStr(stream_merge_desc, ATTR_NAME_BATCH_LABEL, batch_label); + } + } + return AddActiveNodes(graph, stream_merge); } diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index cf46f09d..d8c4779d 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -19,8 +19,6 @@ #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" -using std::string; - namespace ge { Status NextIterationPass::Run(ComputeGraphPtr graph) { GELOGD("NextIterationPass Enter"); @@ -37,6 +35,10 @@ Status NextIterationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } } + if (GroupWithNoBatch(graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Group enter_nodes failed without batch_label attr."); + return INTERNAL_ERROR; + } if (FindWhileGroups() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Find while groups failed."); @@ -71,22 +73,75 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { return FAILED; } - string batch_label; - if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - frame_name += batch_label; + std::string batch_label; + (void)ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty()) { + auto frame_iter = frame_enter_map_.find(frame_name); + if (frame_iter == frame_enter_map_.end()) { + std::vector enter_nodes; + enter_nodes.emplace_back(enter_node); + frame_enter_map_[frame_name] = enter_nodes; + } else { + frame_iter->second.emplace_back(enter_node); + } + return SUCCESS; } - auto iter = loop_group_map_.find(frame_name); - if (iter == loop_group_map_.end()) { + auto group_iter = loop_group_map_.find(frame_name); + if (group_iter == loop_group_map_.end()) { LoopCondGroupPtr loop_group = MakeShared(); if (loop_group == nullptr) { GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); return FAILED; } loop_group->enter_nodes.emplace_back(enter_node); - loop_group_map_[frame_name] = loop_group; + loop_group_map_[frame_name][batch_label] = loop_group; } else { - iter->second->enter_nodes.emplace_back(enter_node); + auto batch_iter = group_iter->second.find(batch_label); + if (batch_iter == group_iter->second.end()) { + LoopCondGroupPtr loop_group = MakeShared(); + if (loop_group == nullptr) { + GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); + return FAILED; + } + loop_group->enter_nodes.emplace_back(enter_node); + group_iter->second[batch_label] = loop_group; + } else { + batch_iter->second->enter_nodes.emplace_back(enter_node); + } + } + + return SUCCESS; +} + +/// +/// @brief Group Enter nodes without batch_label attr +/// @param [in] compute_graph +/// @return Status +/// +Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { + if (frame_enter_map_.empty()) { + GELOGI("All enter nodes in graph %s has batch_label attr.", graph->GetName().c_str()); + return SUCCESS; + } + for (const auto &item : frame_enter_map_) { + const std::string &frame_name = item.first; + auto iter = loop_group_map_.find(frame_name); + if (iter == loop_group_map_.end()) { + LoopCondGroupPtr loop_group = MakeShared(); + if (loop_group == nullptr) { + GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); + return FAILED; + } + loop_group->enter_nodes = item.second; + loop_group_map_[frame_name][""] = loop_group; + } else { + for (auto &batch_item : iter->second) { + for (const auto &enter_node : item.second) { + batch_item.second->enter_nodes.emplace_back(enter_node); + } + } + } } return SUCCESS; @@ -99,39 +154,55 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { Status NextIterationPass::FindWhileGroups() { for (const auto &loop_group_iter : loop_group_map_) { const std::string &frame_name = loop_group_iter.first; - for (const auto &enter_node : loop_group_iter.second->enter_nodes) { - for (const auto &out_node : enter_node->GetOutAllNodes()) { - const string &type = out_node->GetType(); - if ((type != MERGE) && (type != REFMERGE)) { - continue; - } - - NodePtr next_node = nullptr; - if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s", frame_name.c_str()); - return INTERNAL_ERROR; - } - loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); - - NodePtr switch_node = nullptr; - if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str()); - return INTERNAL_ERROR; - } - if (switch_node == nullptr) { - continue; - } - - NodePtr loop_cond = nullptr; - if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); - return INTERNAL_ERROR; - } - if (loop_group_iter.second->loop_cond == nullptr) { - loop_group_iter.second->loop_cond = loop_cond; - } else if (loop_group_iter.second->loop_cond != loop_cond) { - GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str()); - return FAILED; + for (const auto &batch_iter : loop_group_iter.second) { + const std::string &batch_label = batch_iter.first; + for (const auto &enter_node : batch_iter.second->enter_nodes) { + for (const auto &out_node : enter_node->GetOutAllNodes()) { + GELOGI("Find while_group for enter_node %s, frame_name:%s, batch_label:%s.", enter_node->GetName().c_str(), + frame_name.c_str(), batch_label.c_str()); + if ((out_node->GetType() != MERGE) && (out_node->GetType() != REFMERGE)) { + continue; + } + std::string tmp_label; + GE_CHECK_NOTNULL(out_node->GetOpDesc()); + (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); + if (need_skip) { + continue; + } + + NodePtr next_node = nullptr; + if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s", + out_node->GetName().c_str()); + return INTERNAL_ERROR; + } + batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); + + NodePtr switch_node = nullptr; + if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s", + out_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (switch_node == nullptr) { + continue; + } + + NodePtr loop_cond = nullptr; + if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s", + switch_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (batch_iter.second->loop_cond == nullptr) { + batch_iter.second->loop_cond = loop_cond; + } else if (batch_iter.second->loop_cond != loop_cond) { + GELOGE(FAILED, "Multi LoopCond nodes exist."); + return FAILED; + } } } } @@ -152,17 +223,19 @@ bool NextIterationPass::VerifyWhileGroup() { GELOGE(INTERNAL_ERROR, "Verify while group failed, frame_name is empty."); return false; } - if (loop_group_iter.second->loop_cond == nullptr) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); - return false; - } - - for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) { - if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", - frame_name.c_str()); + for (const auto &batch_iter : loop_group_iter.second) { + if (batch_iter.second->loop_cond == nullptr) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); return false; } + + for (const auto &pair_iter : batch_iter.second->merge_next_pairs) { + if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", + frame_name.c_str()); + return false; + } + } } } @@ -176,53 +249,56 @@ bool NextIterationPass::VerifyWhileGroup() { /// Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { for (const auto &loop_cond_iter : loop_group_map_) { - const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName(); - GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); - - // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge - NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); - NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); - if ((enter_active == nullptr) || (next_active == nullptr)) { - GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); - return INTERNAL_ERROR; - } - - for (const auto &enter_node : loop_cond_iter.second->enter_nodes) { - // Enter --> Active - if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge from %s to %s failed.", enter_node->GetName().c_str(), - enter_active->GetName().c_str()); + for (const auto &batch_iter : loop_cond_iter.second) { + const std::string &cond_name = batch_iter.second->loop_cond->GetName(); + GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); + + // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge + NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); + NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); + if ((enter_active == nullptr) || (next_active == nullptr)) { + GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); return INTERNAL_ERROR; } - } - for (const auto &pair : loop_cond_iter.second->merge_next_pairs) { - NodePtr merge_node = pair.first; - NodePtr next_node = pair.second; - // Active --> Merge - if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; + for (const auto &enter_node : batch_iter.second->enter_nodes) { + // Enter --> Active + if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } } - // NextIteration --> Active - if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; + for (const auto &pair : batch_iter.second->merge_next_pairs) { + NodePtr merge_node = pair.first; + NodePtr next_node = pair.second; + // Active --> Merge + if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } + + // NextIteration --> Active + if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } + + // break link between NextIteration and Merge + if (BreakNextIteration(next_node, merge_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); + return INTERNAL_ERROR; + } } - // break link between NextIteration and Merge - if (BreakNextIteration(next_node, merge_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); + if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || + (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); return INTERNAL_ERROR; } } - - if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || - (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); - return INTERNAL_ERROR; - } } return SUCCESS; @@ -289,11 +365,12 @@ Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr & /// @param [in] node /// @param [in] target_type /// @param [in] is_input +/// @param [in] batch_label /// @param [out] target_node /// @return Status /// Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - NodePtr &target_node) { + const std::string &batch_label, NodePtr &target_node) { if (node == nullptr) { GELOGE(PARAM_INVALID, "node is null."); return PARAM_INVALID; @@ -310,6 +387,12 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string } for (const auto &tmp_node : nodes) { + std::string tmp_label; + (void)AttrUtils::GetStr(tmp_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); + if (need_skip) { + continue; + } const std::string type = tmp_node->GetType(); if ((target_type == LOOPCOND) && (type == target_type)) { target_node = tmp_node; @@ -332,6 +415,7 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string /// @return SUCCESS /// Status NextIterationPass::ClearStatus() { + frame_enter_map_.clear(); loop_group_map_.clear(); return SUCCESS; } diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index 3266254d..f8223c20 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -46,6 +46,13 @@ class NextIterationPass : public GraphPass { /// Status GroupEnterNode(const NodePtr &enter_node); + /// + /// @brief Group Enter nodes without batch_label attr + /// @param [in] compute_graph + /// @return Status + /// + Status GroupWithNoBatch(const ComputeGraphPtr &graph); + /// /// @brief Find while groups /// @return Status @@ -90,10 +97,13 @@ class NextIterationPass : public GraphPass { /// @param [out] target_node /// @return Status /// - Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node); + Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, + const std::string &batch_label, NodePtr &target_node); - // map - std::unordered_map loop_group_map_; + // map> + std::unordered_map> frame_enter_map_; + // map> + std::unordered_map> loop_group_map_; }; } // namespace ge #endif // GE_GRAPH_PASSES_NEXT_ITERATION_PASS_H_ diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index b1fb3bbd..9ab74d70 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -44,8 +44,6 @@ using std::set; using std::string; using std::vector; -using std::map; -using std::queue; namespace ge { namespace multibatch { @@ -59,15 +57,10 @@ const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; const int kDivisionConst = 2; -const int32_t kOneInDataNode = 1; -const int32_t kFindNoMatch = 0; inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } -inline bool IsEnterType(const string &node_type) { return (node_type == ENTER) || (node_type == REFENTER); } -const set unchange_types({CONSTANT, CONSTANTOP, ENTER, REFENTER}); - inline bool IsGetNextType(const NodePtr &node) { std::string original_type; GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, @@ -225,6 +218,12 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } + ret = InsertIdentityAfterSwitchN(); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to insert identity nodes after switchn node."); + return INTERNAL_ERROR; + } + GELOGI("Begin to remove useless nodes by prune pass after copy process"); PrunePass prune_pass; ret = prune_pass.Run(graph_); @@ -241,18 +240,6 @@ Status MultiBatchGraphCopyer::Init() { return ret; } - ret = RelinkConstCtrlEdge(); - if (ret != SUCCESS) { - GELOGE(FAILED, "Relink const's control edge failed."); - return FAILED; - } - - ret = ExtractUnchangedStructureOutofCycle(); - if (ret != SUCCESS) { - GELOGE(FAILED, "Extract unchanged structure out of cycle failed."); - return FAILED; - } - for (auto &node : graph_->GetAllNodes()) { origin_all_nodes_.emplace_back(node); if (IsDataLikeType(node->GetType())) { @@ -265,281 +252,6 @@ Status MultiBatchGraphCopyer::Init() { return SUCCESS; } -Status MultiBatchGraphCopyer::RelinkConstCtrlEdge() { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) { - if (node->GetOutDataNodes().empty()) { - continue; - } - if (!node->GetInControlNodes().empty()) { - auto in_ctrl_nodes = node->GetInControlNodes(); - auto out_nodes = node->GetOutAllNodes(); - bool has_merge = false; - for (const auto &out_node : out_nodes) { - GE_CHECK_NOTNULL(out_node); - if (out_node->GetType() == MERGE || out_node->GetType() == REFMERGE) { - has_merge = true; - break; - } - } - if (has_merge) { - continue; - } - auto in_ctrl_anchor = node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor); - in_ctrl_anchor->UnlinkAll(); - for (auto &in_ctrl_node : in_ctrl_nodes) { - auto out_ctrl_anchor_of_in_ctrl_node = in_ctrl_node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_ctrl_anchor_of_in_ctrl_node); - for (auto &out_node : out_nodes) { - if (IsEnterType(out_node->GetType())) { - continue; - } - if (!out_ctrl_anchor_of_in_ctrl_node->IsLinkedWith(out_node->GetInControlAnchor())) { - GE_CHK_STATUS_RET(out_ctrl_anchor_of_in_ctrl_node->LinkTo(out_node->GetInControlAnchor())) - } - } - } - } - auto out_ctrl_anchor = node->GetOutControlAnchor(); - if (out_ctrl_anchor != nullptr) { - out_ctrl_anchor->UnlinkAll(); - } - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::ExtractUnchangedStructureOutofCycle() { - map> frame_enter; - if (GetEnterNodesGroupByFrame(frame_enter) != SUCCESS) { - GELOGE(FAILED, "Get enter nodes grouped by frame_name failed."); - return FAILED; - } - - queue nodes_to_extract; - if (GetNodeNeedExtract(frame_enter, nodes_to_extract) != SUCCESS) { - GELOGE(FAILED, "Get nodes needed to extract failed."); - return FAILED; - } - - while (!nodes_to_extract.empty()) { - auto node = nodes_to_extract.front(); - nodes_to_extract.pop(); - OpDescPtr enter_desc = nullptr; - if (MoveInEntersInDataAnchorDown(node, enter_desc) != SUCCESS) { - GELOGE(FAILED, "Move in enter nodes' in data anchors down of %s failed.", node->GetName().c_str()); - return FAILED; - } - set out_nodes; - if (InsertEnterAfterNode(node, enter_desc, out_nodes) != SUCCESS) { - GELOGE(FAILED, "Insert enter node after %s failed.", node->GetName().c_str()); - return FAILED; - } - - if (MoveCtrlEdgeToOutNodes(node, out_nodes) != SUCCESS) { - GELOGE(FAILED, "Move %s's control edge to out nodes failed.", node->GetName().c_str()); - return FAILED; - } - - for (auto &out_node : out_nodes) { - GE_CHECK_NOTNULL(out_node); - if (AllInDataNodesUnchangeAndNoMergeOut(out_node)) { - nodes_to_extract.push(out_node); - } - } - } - - if (DeleteEnterWithoutDataOut() != SUCCESS) { - GELOGE(FAILED, "Delete enter node without out data nodes failed."); - return FAILED; - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map> &frame_enter) { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if (IsEnterType(node->GetType())) { - if (!node->GetInControlNodes().empty() || !node->GetOutControlNodes().empty()) { - continue; - } - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string frame_name; - if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str()); - return FAILED; - } - frame_enter[frame_name].emplace_back(node); - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::GetNodeNeedExtract(const map> &frame_enter, - queue &nodes_to_extract) { - for (const auto &one_group : frame_enter) { - auto enters = one_group.second; - for (const auto &enter : enters) { - auto out_data_nodes = enter->GetOutDataNodes(); - for (const auto &out_data_node : out_data_nodes) { - GE_CHECK_NOTNULL(out_data_node); - if (AllInDataNodesUnchangeAndNoMergeOut(out_data_node)) { - nodes_to_extract.push(out_data_node); - } - } - } - } - - return SUCCESS; -} - -bool MultiBatchGraphCopyer::AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node) { - auto out_data_nodes = node->GetOutDataNodes(); - for (const auto &out_data_node : out_data_nodes) { - if (out_data_node == nullptr) { - return false; - } - - if (out_data_node->GetType() == MERGE || out_data_node->GetType() == REFMERGE) { - return false; - } - } - - auto in_data_nodes = node->GetInDataNodes(); - if (in_data_nodes.size() == kOneInDataNode) { - return true; - } - - for (const auto &in_data_node : in_data_nodes) { - if (in_data_node == nullptr) { - return false; - } - if (unchange_types.count(in_data_node->GetType()) == kFindNoMatch) { - return false; - } - } - - return true; -} - -Status MultiBatchGraphCopyer::MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc) { - auto in_data_anchors = node->GetAllInDataAnchors(); - for (auto &in_data_anchor : in_data_anchors) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); - auto peer_in_data_node = peer_out_data_anchor->GetOwnerNode(); - if (IsEnterType(peer_in_data_node->GetType())) { - GE_CHK_STATUS_RET(peer_out_data_anchor->Unlink(in_data_anchor)) - GELOGD("Unlink data edge from %s to %s.", peer_in_data_node->GetName().c_str(), node->GetName().c_str()); - auto enter_in_data_anchors = peer_in_data_node->GetAllInDataAnchors(); - for (auto &enter_in_data_anchor : enter_in_data_anchors) { - auto peer_out_data_anchor_of_enter = enter_in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor_of_enter); - if (peer_out_data_anchor_of_enter->IsLinkedWith(in_data_anchor)) { - continue; - } - GE_CHK_STATUS_RET(peer_out_data_anchor_of_enter->LinkTo(in_data_anchor)) - GELOGD("Relink data edge from %s to %s.", peer_out_data_anchor_of_enter->GetOwnerNode()->GetName().c_str(), - node->GetName().c_str()); - } - enter_desc = peer_in_data_node->GetOpDesc(); - GE_CHECK_NOTNULL(enter_desc); - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::InsertEnterAfterNode(NodePtr &node, const OpDescPtr ©_desc, set &out_nodes) { - if (copy_desc == nullptr) { - return SUCCESS; - } - map>> outanchors_inanchors_nodes; - auto out_data_anchors = node->GetAllOutDataAnchors(); - for (auto &out_data_anchor : out_data_anchors) { - auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); - for (auto peer_in_data_anchor : peer_in_data_anchors) { - GE_CHECK_NOTNULL(peer_in_data_anchor); - auto peer_in_data_node = peer_in_data_anchor->GetOwnerNode(); - out_nodes.emplace(peer_in_data_node); - outanchors_inanchors_nodes[out_data_anchor].emplace_back(std::make_pair(peer_in_data_anchor, peer_in_data_node)); - } - } - - int32_t i = 0; - auto node_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(node_desc); - // Insert one enter node after node's per out data anchor - for (auto &outanchor_inanchors_nodes : outanchors_inanchors_nodes) { - string name = node->GetName() + "_" + ENTER + "_" + std::to_string(i++); - GELOGD("Create Enter op %s after %s.", name.c_str(), node->GetName().c_str()); - auto enter_desc = AttrUtils::CopyOpDesc(copy_desc); - enter_desc->SetName(name); - GE_CHK_STATUS_RET( - enter_desc->UpdateInputDesc("x", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) - GE_CHK_STATUS_RET( - enter_desc->UpdateOutputDesc("y", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) - auto enter_node = graph_->AddNode(enter_desc); - GE_CHECK_NOTNULL(enter_node); - GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->LinkTo(enter_node->GetInDataAnchor(kDataInIndex))) - GE_CHECK_NOTNULL(enter_node->GetOutDataAnchor(kDataInIndex)); - for (auto &inanchor_node : outanchor_inanchors_nodes.second) { - GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->Unlink(inanchor_node.first)) - GE_CHK_STATUS_RET(enter_node->GetOutDataAnchor(kDataInIndex)->LinkTo(inanchor_node.first)) - GELOGD("Unlink from %s to %s, link from %s to %s then to %s.", node->GetName().c_str(), - inanchor_node.second->GetName().c_str(), node->GetName().c_str(), enter_node->GetName().c_str(), - inanchor_node.second->GetName().c_str()); - } - } - - return SUCCESS; -} - -// Move node's in control edges to out data nodes -Status MultiBatchGraphCopyer::MoveCtrlEdgeToOutNodes(NodePtr &node, set &out_nodes) { - auto in_ctrl_anchor = node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor); - auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); - for (auto &peer_out_ctrl_anchor : peer_out_ctrl_anchors) { - GE_CHK_STATUS_RET(peer_out_ctrl_anchor->Unlink(in_ctrl_anchor)) - GELOGD("Unlink control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), - node->GetName().c_str()); - for (auto &out_node : out_nodes) { - auto in_ctrl_anchor_of_out_node = out_node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor_of_out_node); - if (!peer_out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor_of_out_node)) { - GE_CHK_STATUS_RET(peer_out_ctrl_anchor->LinkTo(in_ctrl_anchor_of_out_node)) - GELOGD("Link control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), - out_node->GetName().c_str()); - } - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::DeleteEnterWithoutDataOut() { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if (IsEnterType(node->GetType())) { - auto out_nodes = node->GetOutAllNodes(); - if (out_nodes.empty()) { - GELOGD("Delete enter node: %s which has no output.", node->GetName().c_str()); - GE_CHK_STATUS_RET(GraphUtils::IsolateNode(node, {})) - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph_, node)) - } - } - } - - return SUCCESS; -} - void MultiBatchGraphCopyer::LabelStatusForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); GELOGI("Label status for %s, shape_dims is %s.", data->GetName().c_str(), @@ -585,9 +297,6 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { LabelStatusForGetNextSink(data); } } - - map> frame_enters; - InitStatus(frame_enters); bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { @@ -597,13 +306,12 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { if (iter != origin_nodes_status_.end()) { continue; } - for (auto &in_node : node->GetInDataNodes()) { - if (origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end()) { - if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end()) { - origin_nodes_status_[node.get()] == kNodeInBatchBranch; - ResetEnterStatus(frame_enters, node); - changed = true; - } + for (auto &in_node : node->GetInAllNodes()) { + bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && + origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; + if (is_in_batch) { + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + changed = true; break; } } @@ -612,45 +320,6 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { return SUCCESS; } -void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { - for (const auto &node : origin_all_nodes_) { - if (!IsEnterType(node->GetType())) { - continue; - } - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - string frame_name; - if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - frame_enters[frame_name].emplace_back(node); - } - } - - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } -} - -void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { - if (!IsEnterType(node->GetType())) { - return; - } - - for (const auto &frame_enter : frame_enters) { - auto &enters = frame_enter.second; - if (std::find(enters.begin(), enters.end(), node) != enters.end()) { - for (const auto &enter : enters) { - origin_nodes_status_[enter.get()] = kNodeInBatchBranch; - } - break; - } - } -} - Status MultiBatchGraphCopyer::LabelStatus() { if (LabelInBatchBranchStatus() != SUCCESS) { GELOGE(PARAM_INVALID, "Failed to label no in batch branch"); @@ -1691,6 +1360,52 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { return SUCCESS; } +Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { + for (auto &node : graph_->GetAllNodes()) { + if (node->GetType() != SWITCHN) { + continue; + } + auto switchn_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(switchn_desc); + size_t i = 0; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto out_node = in_data_anchor->GetOwnerNode(); + auto op_desc = out_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); + continue; + } + + auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); + GE_CHECK_NOTNULL(identity_desc); + + string batch_label; + if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + GELOGE(FAILED, "Set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", identity_desc->GetName().c_str()); + return FAILED; + } + } + + auto data_desc = switchn_desc->GetOutputDesc(i); + i++; + GE_CHK_STATUS_RET(identity_desc->AddInputDesc("x", data_desc)); + GE_CHK_STATUS_RET(identity_desc->AddOutputDesc("y", data_desc)); + + auto identity_node = graph_->AddNode(identity_desc); + GE_CHECK_NOTNULL(identity_node); + GE_CHK_STATUS_RET(out_data_anchor->LinkTo(identity_node->GetInDataAnchor(0))); + GE_CHECK_NOTNULL(identity_node->GetOutControlAnchor()); + GE_CHK_STATUS_RET(identity_node->GetOutControlAnchor()->LinkTo(out_node->GetInControlAnchor())); + } + } + } + + return SUCCESS; +} + Status ProcessMultiBatch(ComputeGraphPtr &graph) { const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); if (multi_batch_with_case != nullptr) { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index d51c4c02..a0de4413 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -18,7 +18,6 @@ #include #include #include -#include #include "external/ge/ge_api_error_codes.h" @@ -65,26 +64,12 @@ class MultiBatchGraphCopyer { private: Status Init(); Status CheckArguments(); - Status RelinkConstCtrlEdge(); - - Status ExtractUnchangedStructureOutofCycle(); - Status GetEnterNodesGroupByFrame(std::map> &frame_enter); - Status GetNodeNeedExtract(const std::map> &frame_enter, - std::queue &nodes_to_extract); - bool AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node); - Status MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc); - Status InsertEnterAfterNode(NodePtr &node, const OpDescPtr &enter_desc, std::set &out_nodes); - Status MoveCtrlEdgeToOutNodes(NodePtr &node, std::set &out_nodes); - Status DeleteEnterWithoutDataOut(); // label status for origin_all_nodes_ Status LabelStatus(); Status LabelInBatchBranchStatus(); void LabelStatusForData(const NodePtr &data); void LabelStatusForGetNextSink(const NodePtr &data); - void InitStatus(std::map> &frame_enters); - void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); - // add nodes functions Status CreateNewNodes(); @@ -96,6 +81,7 @@ class MultiBatchGraphCopyer { Status InsertSwitchNForData(const NodePtr &node, const size_t &out_anchor_index, const size_t &peer_in_anchor_index, std::vector> &dynamic_out_to_switchn); + Status InsertIdentityAfterSwitchN(); Status UpdateMaxShapeToData(const NodePtr &node, size_t out_anchor_index); Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); From 9b3f6bd090d07ac63ca9ba2e190688cd3e282db5 Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 17:14:57 +0800 Subject: [PATCH 075/127] Fix code check --- ge/engine_manager/dnnengine_manager.cc | 4 ++-- ge/engine_manager/dnnengine_manager.h | 4 ++-- ge/init/gelib.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ge/engine_manager/dnnengine_manager.cc b/ge/engine_manager/dnnengine_manager.cc index b23993b6..777d2b38 100644 --- a/ge/engine_manager/dnnengine_manager.cc +++ b/ge/engine_manager/dnnengine_manager.cc @@ -158,7 +158,7 @@ std::shared_ptr DNNEngineManager::GetEngine(const std::string &na return nullptr; } -bool DNNEngineManager::IsEngineRegistered(const std::string &name) { +bool DNNEngineManager::IsEngineRegistered(const std::string &name) const { auto iter = engines_map_.find(name); if (iter != engines_map_.end()) { return true; @@ -177,7 +177,7 @@ const map &DNNEngineManager::GetCheckSupportCost() const { return checksupport_cost_; } -std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { +std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) const { std::lock_guard lock(mutex_); GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: node_ptr is nullptr"); diff --git a/ge/engine_manager/dnnengine_manager.h b/ge/engine_manager/dnnengine_manager.h index c3ae5b95..4cc0b8cf 100755 --- a/ge/engine_manager/dnnengine_manager.h +++ b/ge/engine_manager/dnnengine_manager.h @@ -61,9 +61,9 @@ class DNNEngineManager { public: friend class GELib; std::shared_ptr GetEngine(const std::string &name) const; - bool IsEngineRegistered(const std::string &name); + bool IsEngineRegistered(const std::string &name) const; // If can't find appropriate engine name, return "", report error - string GetDNNEngineName(const ge::NodePtr &node_ptr); + string GetDNNEngineName(const ge::NodePtr &node_ptr) const; const map &GetSchedulers() const; const map &GetCheckSupportCost() const; void InitPerformanceStaistic(); diff --git a/ge/init/gelib.h b/ge/init/gelib.h index e52b8dd6..a2aca99d 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -48,13 +48,13 @@ class GELib { Status Finalize(); // get DNNEngineManager object - DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } + const DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } // get OpsKernelManager object - OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } + const OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } // get SessionManager object - SessionManager &SessionManagerObj() { return sessionManager_; } + const SessionManager &SessionManagerObj() { return sessionManager_; } // get Initial flag bool InitFlag() const { return init_flag_; } From 6d73d06877e295a8526f38686ee84ad5748d974c Mon Sep 17 00:00:00 2001 From: dongduo Date: Thu, 10 Dec 2020 17:25:02 +0800 Subject: [PATCH 076/127] Fix code check --- ge/engine_manager/dnnengine_manager.cc | 4 ++-- ge/engine_manager/dnnengine_manager.h | 4 ++-- ge/init/gelib.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ge/engine_manager/dnnengine_manager.cc b/ge/engine_manager/dnnengine_manager.cc index 777d2b38..b23993b6 100644 --- a/ge/engine_manager/dnnengine_manager.cc +++ b/ge/engine_manager/dnnengine_manager.cc @@ -158,7 +158,7 @@ std::shared_ptr DNNEngineManager::GetEngine(const std::string &na return nullptr; } -bool DNNEngineManager::IsEngineRegistered(const std::string &name) const { +bool DNNEngineManager::IsEngineRegistered(const std::string &name) { auto iter = engines_map_.find(name); if (iter != engines_map_.end()) { return true; @@ -177,7 +177,7 @@ const map &DNNEngineManager::GetCheckSupportCost() const { return checksupport_cost_; } -std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) const { +std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { std::lock_guard lock(mutex_); GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: node_ptr is nullptr"); diff --git a/ge/engine_manager/dnnengine_manager.h b/ge/engine_manager/dnnengine_manager.h index 4cc0b8cf..c3ae5b95 100755 --- a/ge/engine_manager/dnnengine_manager.h +++ b/ge/engine_manager/dnnengine_manager.h @@ -61,9 +61,9 @@ class DNNEngineManager { public: friend class GELib; std::shared_ptr GetEngine(const std::string &name) const; - bool IsEngineRegistered(const std::string &name) const; + bool IsEngineRegistered(const std::string &name); // If can't find appropriate engine name, return "", report error - string GetDNNEngineName(const ge::NodePtr &node_ptr) const; + string GetDNNEngineName(const ge::NodePtr &node_ptr); const map &GetSchedulers() const; const map &GetCheckSupportCost() const; void InitPerformanceStaistic(); diff --git a/ge/init/gelib.h b/ge/init/gelib.h index a2aca99d..e52b8dd6 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -48,13 +48,13 @@ class GELib { Status Finalize(); // get DNNEngineManager object - const DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } + DNNEngineManager &DNNEngineManagerObj() { return engineManager_; } // get OpsKernelManager object - const OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } + OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } // get SessionManager object - const SessionManager &SessionManagerObj() { return sessionManager_; } + SessionManager &SessionManagerObj() { return sessionManager_; } // get Initial flag bool InitFlag() const { return init_flag_; } From a4821a3ccce0d740af0f097aebd2885a6f39f4d2 Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Thu, 10 Dec 2020 18:12:28 +0800 Subject: [PATCH 077/127] atc and fwk atc.bin --- ge/offline/CMakeLists.txt | 32 +++++++++++++++++++++++++------- ge/offline/atc | 13 +++++++------ ge/offline/module.mk | 4 ++-- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 2f9195bc..21221042 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -74,22 +74,22 @@ target_link_libraries(atc PRIVATE -ldl ) -############ atc.bin ############ -add_executable(atc.bin ${SRC_LIST} ${PROTO_HDRS}) +############ atc_atc.bin ############ +add_executable(atc_atc.bin ${SRC_LIST} ${PROTO_HDRS}) -target_compile_options(atc.bin PRIVATE +target_compile_options(atc_atc.bin PRIVATE -Werror -O2 -Wno-deprecated-declarations ) -target_compile_definitions(atc.bin PRIVATE +target_compile_definitions(atc_atc.bin PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 COMPILE_OMG_PACKAGE google=ascend_private ) -target_include_directories(atc.bin PRIVATE +target_include_directories(atc_atc.bin PRIVATE ${CMAKE_CURRENT_LIST_DIR} ${GE_CODE_DIR} ${GE_CODE_DIR}/ge @@ -115,7 +115,7 @@ target_include_directories(atc.bin PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) -target_link_libraries(atc.bin PRIVATE +target_link_libraries(atc_atc.bin PRIVATE $ ascend_protobuf ge_common @@ -134,6 +134,11 @@ target_link_libraries(atc.bin PRIVATE -ldl ) +set_target_properties(atc_atc.bin PROPERTIES + OUTPUT_NAME atc.bin + RUNTIME_OUTPUT_DIRECTORY atclib +) + ############ fwk_atc.bin ############ add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS}) @@ -194,10 +199,23 @@ target_link_libraries(fwk_atc.bin PRIVATE -ldl ) +set_target_properties(fwk_atc.bin PROPERTIES + OUTPUT_NAME atc.bin + RUNTIME_OUTPUT_DIRECTORY fwkacl +) + ############ install ############ set(INSTALL_BASE_DIR "") set(INSTALL_LIBRARY_DIR lib) -install(TARGETS atc atc.bin fwk_atc.bin OPTIONAL +install(TARGETS atc OPTIONAL LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} ) + +install(TARGETS atc_atc.bin OPTIONAL + RUNTIME DESTINATION ${INSTALL_LIBRARY_DIR}/atclib +) + +install(TARGETS fwk_atc.bin OPTIONAL + RUNTIME DESTINATION ${INSTALL_LIBRARY_DIR}/fwkacl +) diff --git a/ge/offline/atc b/ge/offline/atc index 73dfbee0..05c65c26 100644 --- a/ge/offline/atc +++ b/ge/offline/atc @@ -4,7 +4,12 @@ # Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. #------------------------------------------------------------------- -LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) +real_path=$(readlink "$0") +if [ $? -eq 0 ]; then + LOCAL_PATH=$(cd "$(dirname "$real_path")"; pwd) +else + LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) +fi PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd) LIB_P="/lib64" PYTHON_P="/python/site-packages" @@ -13,8 +18,4 @@ PYTHON_PATH="${PKG_PATH}${PYTHON_P}" export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" -if [ -f "${PKG_PATH}/bin/atc.bin" ];then - ${PKG_PATH}/bin/atc.bin/atc.bin $@ -else - ${PKG_PATH}/bin/atc.bin/fwk_atc.bin $@ -fi +${PKG_PATH}/bin/atc.bin "$@" diff --git a/ge/offline/module.mk b/ge/offline/module.mk index 8018266a..5c7a919c 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -56,7 +56,7 @@ include $(BUILD_HOST_EXECUTABLE) include $(CLEAR_VARS) -LOCAL_MODULE := atc.bin +LOCAL_MODULE := atclib/atc.bin LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private @@ -109,7 +109,7 @@ include $(BUILD_HOST_EXECUTABLE) include $(CLEAR_VARS) -LOCAL_MODULE := fwk_atc.bin +LOCAL_MODULE := fwkacl/atc.bin LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private From 26a6505d376e951dae6e6c0a4452bf9b800ff9b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Thu, 10 Dec 2020 22:08:49 +0800 Subject: [PATCH 078/127] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?= =?UTF-8?q?!557=20:=20ir=20build=20optimize'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/ir_build/ge_ir_build.cc | 80 +++++--------------------------------- 1 file changed, 10 insertions(+), 70 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 60f90313..96ae9b24 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -50,11 +50,6 @@ const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default"; const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize"; const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; - -const std::string kInputShape = "input_shape"; -const std::string kInputFormat = "input_format"; -const std::string kReUseMemEnable = "1"; -const std::string kReUseMemDisEnable = "0"; } // namespace static graphStatus CheckGlobalOptions(std::map &global_options) { @@ -237,7 +232,6 @@ class Impl { ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); - graphStatus UpdateDataOpAttr(const Graph &graph); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); @@ -248,40 +242,6 @@ class Impl { OmgContext omg_context_; }; -graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { - GELOGD("Enter Update Data Attr Process!"); - if (options_.find(kInputShape) == options_.end()) { - return GRAPH_SUCCESS; - } - unordered_map> shape_map; - vector>> user_shape_map; - GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), - return GRAPH_PARAM_INVALID, "parse input shape failed!"); - auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { - GE_CHECK_NOTNULL(input_node); - ge::OpDescPtr op = input_node->GetOpDesc(); - GE_CHECK_NOTNULL(op); - if (op->GetType() == DATA) { - auto tensor_input = op->MutableInputDesc(0); - auto tensor_output = op->MutableOutputDesc(0); - GE_CHECK_NOTNULL(tensor_input); - GE_CHECK_NOTNULL(tensor_output); - string data_op_name = op->GetName(); - auto iter = shape_map.find(data_op_name); - if (iter != shape_map.end()) { - tensor_input->SetShape(ge::GeShape(iter->second)); - tensor_output->SetShape(ge::GeShape(iter->second)); - GELOGD("update input [%s] shape info", data_op_name.c_str()); - } else { - GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); - } - } - } - return GRAPH_SUCCESS; -} - graphStatus Impl::CheckOptions(const std::map &options) { for (auto &ele : options) { auto it = ge::ir_option::ir_builder_suppported_options.find(ele.first); @@ -317,11 +277,6 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_PARAM_INVALID; } } - // Check option EXEC_DISABLE_REUSED_MEMORY - it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); - if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { - return GRAPH_PARAM_INVALID; - } return GRAPH_SUCCESS; } @@ -368,10 +323,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map(string(IR_OPTION_MODE), to_string(0))); + options_.insert(std::pair(string(IR_OPTION_TARGET), "mini")); options_.insert(std::pair(string(ge::RUN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::TRAIN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), to_string(0))); @@ -468,52 +421,39 @@ void Impl::UpdateThreadContext() { graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); + int64_t index = 0; for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); ge::OpDescPtr op = input_node->GetOpDesc(); GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { + (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); - auto tensor = op->MutableInputDesc(0); - GE_CHECK_NOTNULL(tensor); + ge::GeTensorDesc tensor = op->GetInputDesc(0); string data_op_name = op->GetName(); GELOGD("Data op name: %s", data_op_name.c_str()); ge::GeShape data_shape; auto iter = omg_context_.input_dims.find(data_op_name); if (iter != omg_context_.input_dims.end()) { data_shape = ge::GeShape(iter->second); - GELOGD("Data op get shape from Context and update [%s] shape info", data_op_name.c_str()); + GELOGD("Data op get shape from Context."); } else { - data_shape = tensor->GetShape(); + data_shape = tensor.GetShape(); GELOGD("Data op get shape from InputDesc in ge ir graph."); } // If user point input format, do work for all data ops; else do according to tensor_desc auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? - ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor->GetFormat(); - ge::DataType data_type = tensor->GetDataType(); + ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); + ge::DataType data_type = tensor.GetDataType(); string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); GELOGD("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); ge::GeTensor inputTensor; ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); inputTensor.SetTensorDesc(desc); - int64_t index = 0; - if (AttrUtils::GetInt(op, ATTR_NAME_INDEX, index)) { - AttrUtils::SetInt(desc, ATTR_NAME_INDEX, index); - } else { - GELOGE(GRAPH_PARAM_INVALID, "Get attr name idx failed!"); - return GRAPH_PARAM_INVALID; - } - inputs.emplace_back(inputTensor); + inputs.push_back(inputTensor); } } - std::sort(inputs.begin(), inputs.end(), [](ge::GeTensor a, ge::GeTensor b) { - int64_t data_idx_a = 0; - int64_t data_idx_b = 0; - AttrUtils::GetInt(a.MutableTensorDesc(), ATTR_NAME_INDEX, data_idx_a); - AttrUtils::GetInt(b.MutableTensorDesc(), ATTR_NAME_INDEX, data_idx_b); - return data_idx_a <= data_idx_b; - }); GELOGD("CreateInputsForIRBuild, inputs size: %zu", inputs.size()); return GRAPH_SUCCESS; } From 9a867fb622c317ee28062ae2ecb06aaaf9a725d0 Mon Sep 17 00:00:00 2001 From: wxl Date: Fri, 11 Dec 2020 13:55:17 +0800 Subject: [PATCH 079/127] Bugfix: single op and ir build question bugfix --- ge/ir_build/ge_ir_build.cc | 48 ++++++++++++++++++++++++++++++++-- ge/offline/single_op_parser.cc | 19 ++++++++++++++ ge/offline/single_op_parser.h | 6 +++++ 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 96ae9b24..c010a690 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -36,7 +36,6 @@ #include "model/ge_model.h" #include "graph/shape_refiner.h" #include "graph/opsproto_manager.h" -#include "graph/utils/type_utils.h" using std::string; using namespace std; @@ -50,6 +49,8 @@ const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default"; const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize"; const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; +const std::string kInputShape = "input_shape"; +const std::string kInputFormat = "input_format"; } // namespace static graphStatus CheckGlobalOptions(std::map &global_options) { @@ -227,6 +228,7 @@ class Impl { graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); + graphStatus UpdateDataOpAttr(const Graph &graph); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); @@ -242,6 +244,40 @@ class Impl { OmgContext omg_context_; }; +graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { + GELOGD("Enter Update Data Attr Process!"); + if (options_.find(kInputShape) == options_.end()) { + return GRAPH_SUCCESS; + } + unordered_map> shape_map; + vector>> user_shape_map; + GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), + return GRAPH_PARAM_INVALID, "parse input shape failed!"); + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + auto tensor_input = op->MutableInputDesc(0); + auto tensor_output = op->MutableOutputDesc(0); + GE_CHECK_NOTNULL(tensor_input); + GE_CHECK_NOTNULL(tensor_output); + string data_op_name = op->GetName(); + auto iter = shape_map.find(data_op_name); + if (iter != shape_map.end()) { + tensor_input->SetShape(ge::GeShape(iter->second)); + tensor_output->SetShape(ge::GeShape(iter->second)); + GELOGD("update input [%s] shape info", data_op_name.c_str()); + } else { + GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); + } + } + } + return GRAPH_SUCCESS; +} + graphStatus Impl::CheckOptions(const std::map &options) { for (auto &ele : options) { auto it = ge::ir_option::ir_builder_suppported_options.find(ele.first); @@ -277,6 +313,11 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_PARAM_INVALID; } } + // Check option EXEC_DISABLE_REUSED_MEMORY + it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); + if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { + return GRAPH_PARAM_INVALID; + } return GRAPH_SUCCESS; } @@ -323,7 +364,10 @@ graphStatus Impl::Init(const Graph &graph, const std::map &dict, string &key, T default_val) { } void from_json(const Json &j, SingleOpTensorDesc &desc) { + bool is_tensor_valid = true; desc.dims = j.at(kKeyShape).get>(); auto it = j.find(kKeyShapeRange); if (it != j.end()) { @@ -189,9 +190,12 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) { string type_str = j.at(kKeyType).get(); desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED); desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(format_str); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsDataTypeValid(type_str); it = j.find(kKeyOriginFormat); if (it != j.end()) { string origin_format_str = j.at(kKeyOriginFormat).get(); + is_tensor_valid = is_tensor_valid && ge::TypeUtils::IsFormatValid(origin_format_str); desc.ori_format = GetValue(kFormatDict, origin_format_str, FORMAT_RESERVED); } auto tensor_name = j.find(kKeyName); @@ -202,6 +206,9 @@ void from_json(const Json &j, SingleOpTensorDesc &desc) { if (dynamic_input_name != j.end()) { desc.dynamic_input_name = dynamic_input_name->get(); } + if (!is_tensor_valid) { + desc.SetValidFlag(is_tensor_valid); + } } void from_json(const Json &j, SingleOpAttr &attr) { @@ -305,6 +312,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { int index = 0; for (auto &tensor_desc : op_desc.input_desc) { + if (!tensor_desc.GetValidFlag()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, + {"intput", "datatype or format", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index); + return false; + } if ((tensor_desc.type == DT_UNDEFINED && tensor_desc.format != FORMAT_RESERVED) || (tensor_desc.type != DT_UNDEFINED && tensor_desc.format == FORMAT_RESERVED)){ ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, @@ -317,6 +330,12 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { index = 0; for (auto &tensor_desc : op_desc.output_desc) { + if (!tensor_desc.GetValidFlag()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, + {"output", "datatype", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index); + return false; + } if (tensor_desc.type == DT_UNDEFINED) { ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, {"output", "datatype", std::to_string(index)}); diff --git a/ge/offline/single_op_parser.h b/ge/offline/single_op_parser.h index 19879a32..71aa58bb 100644 --- a/ge/offline/single_op_parser.h +++ b/ge/offline/single_op_parser.h @@ -28,6 +28,10 @@ namespace ge { struct SingleOpTensorDesc { +public: + bool GetValidFlag() const { return is_valid_; } + void SetValidFlag(bool is_valid) { is_valid_ = is_valid; } +public: std::string name; std::vector dims; std::vector ori_dims; @@ -36,6 +40,8 @@ struct SingleOpTensorDesc { ge::Format ori_format = ge::FORMAT_RESERVED; ge::DataType type = ge::DT_UNDEFINED; std::string dynamic_input_name; +private: + bool is_valid_ = true; }; struct SingleOpAttr { From 2c9c9474bfbd2baa044898394a90806225337d9b Mon Sep 17 00:00:00 2001 From: wxl Date: Fri, 11 Dec 2020 13:59:54 +0800 Subject: [PATCH 080/127] Bugfix: single op and ir build question bugfix --- ge/offline/single_op_parser.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc index 09001515..b1e0da6d 100644 --- a/ge/offline/single_op_parser.cc +++ b/ge/offline/single_op_parser.cc @@ -27,6 +27,7 @@ #include "common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/operator_factory_impl.h" From 5982b2b1d3e0d7aa3743f5778b93ceeac2801f04 Mon Sep 17 00:00:00 2001 From: baker Date: Fri, 11 Dec 2020 14:18:54 +0800 Subject: [PATCH 081/127] test --- ge/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 88a5c52f..366fbce7 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -927,3 +927,4 @@ install(FILES DESTINATION ${INSTALL_LIBRARY_DIR} ) endif() + From 763d5196442e96ba03c7edd08d31a41f2c55ff2f Mon Sep 17 00:00:00 2001 From: baker Date: Fri, 11 Dec 2020 15:14:10 +0800 Subject: [PATCH 082/127] update submodule --- metadef | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadef b/metadef index d19c9c5c..bd2cfdfa 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit d19c9c5c92f21a0335c18681dcceed44f3a54ddc +Subproject commit bd2cfdfa85a3d9dcbd7dc825f5759c7f8b3ffa9a From 92fa54c0edc66016f3d20bb67993b087b7647d56 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Fri, 11 Dec 2020 15:35:41 +0800 Subject: [PATCH 083/127] modified: ge/CMakeLists.txt modified: ge/ge_inference.mk modified: ge/ge_runner.mk --- ge/CMakeLists.txt | 2 ++ ge/ge_inference.mk | 1 + ge/ge_runner.mk | 1 + 3 files changed, 4 insertions(+) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index b037f4a4..7c8ea305 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -202,6 +202,7 @@ set(TRAIN_SRC_LIST "host_kernels/sub_kernel.cc" "host_kernels/transdata_kernel.cc" "host_kernels/unpack_kernel.cc" + "host_kernels/reformat_kernel.cc" "graph/passes/folding_pass.cc" "graph/passes/get_original_format_pass.cc" "graph/passes/guarantee_const_pass.cc" @@ -488,6 +489,7 @@ set(INFER_SRC_LIST "host_kernels/slice_d_kernel.cc" "host_kernels/dynamic_stitch_kernel.cc" "host_kernels/identity_kernel.cc" + "host_kernels/reformat_kernel.cc" "graph/passes/stop_gradient_pass.cc" "graph/passes/prevent_gradient_pass.cc" "graph/passes/identity_pass.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index fe76a612..3503576f 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -164,6 +164,7 @@ OMG_HOST_SRC_FILES := \ host_kernels/slice_d_kernel.cc \ host_kernels/dynamic_stitch_kernel.cc \ host_kernels/identity_kernel.cc \ + host_kernels/reformat_kernel.cc \ graph/passes/stop_gradient_pass.cc \ graph/passes/prevent_gradient_pass.cc \ graph/passes/identity_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 58ad1266..2b9ceb5c 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -171,6 +171,7 @@ LIBGE_LOCAL_SRC_FILES := \ host_kernels/sub_kernel.cc \ host_kernels/transdata_kernel.cc \ host_kernels/unpack_kernel.cc \ + host_kernels/reformat_kernel.cc \ graph/passes/folding_pass.cc \ graph/passes/get_original_format_pass.cc \ graph/passes/guarantee_const_pass.cc \ From d4b2d82cc970a519d8af91391122e626bf03191d Mon Sep 17 00:00:00 2001 From: baker Date: Fri, 11 Dec 2020 15:47:52 +0800 Subject: [PATCH 084/127] test --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 648bb954..f0968cf9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,4 +190,4 @@ else() set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) endif() -add_subdirectory(ge) +add_subdirectory(ge) \ No newline at end of file From daf1524a6476aaf1ab3d82e67605f978b97f0629 Mon Sep 17 00:00:00 2001 From: baker Date: Fri, 11 Dec 2020 15:49:23 +0800 Subject: [PATCH 085/127] test --- CMakeLists.txt | 2 +- ge/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f0968cf9..648bb954 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,4 +190,4 @@ else() set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) endif() -add_subdirectory(ge) \ No newline at end of file +add_subdirectory(ge) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 366fbce7..88a5c52f 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -927,4 +927,3 @@ install(FILES DESTINATION ${INSTALL_LIBRARY_DIR} ) endif() - From ef63b2207ae8f3ffd3df1277175f26af84349d64 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Fri, 11 Dec 2020 16:00:13 +0800 Subject: [PATCH 086/127] add ATTR_NAME_ROOT_GRAPH_ID --- ge/graph/manager/graph_manager.cc | 5 ++++- metadef | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 9ce68d76..90437d77 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2466,7 +2466,6 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GetContext().SetSessionId(session_id); GetThreadLocalContext() = ge_context; graph_manager->UpdateLocalOmgContext(root_graph_id); - ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", @@ -2474,6 +2473,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager pthread_self()); GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); GE_CHECK_NOTNULL(compute_graph_tmp); + if (!AttrUtils::SetInt(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_ID, root_graph_id)) { + GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); + return FAILED; + } compute_graph_tmp->SetSessionID(session_id); Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, compute_graph, diff --git a/metadef b/metadef index d19c9c5c..bd2cfdfa 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit d19c9c5c92f21a0335c18681dcceed44f3a54ddc +Subproject commit bd2cfdfa85a3d9dcbd7dc825f5759c7f8b3ffa9a From 11161e313e5cd2931d5eacf6b5c9b33cdd81fe09 Mon Sep 17 00:00:00 2001 From: weiyang Date: Thu, 10 Dec 2020 20:30:18 +0800 Subject: [PATCH 087/127] fix cpp lint and errocode --- ge/common/debug/memory_dumper.cc | 3 ++- ge/common/helper/model_helper.cc | 3 ++- ge/common/helper/om_file_helper.cc | 3 ++- ge/executor/ge_executor.cc | 3 ++- ge/graph/build/graph_builder.cc | 3 ++- .../build/memory/binary_block_mem_assigner.cc | 2 +- ge/graph/build/memory/block_mem_assigner.cc | 5 +++-- ge/graph/build/memory/graph_mem_assigner.cc | 6 +++-- ge/graph/build/stream_allocator.cc | 3 ++- ge/graph/load/graph_loader.cc | 3 ++- .../load/new_model_manager/davinci_model.cc | 22 ++++++++++--------- .../load/new_model_manager/model_manager.cc | 3 ++- .../task_info/hccl_task_info.cc | 3 ++- .../task_info/kernel_task_info.cc | 4 ++-- ge/graph/manager/graph_manager.cc | 9 ++++++-- ge/graph/manager/util/hcom_util.cc | 3 ++- ge/graph/passes/subgraph_pass.cc | 8 +++---- ge/graph/preprocess/graph_preprocess.cc | 3 ++- ge/host_kernels/ssd_prior_box_kernel.cc | 8 +++++-- .../executor/hybrid_model_async_executor.cc | 6 +++-- .../executor/worker/shape_inference_engine.cc | 3 ++- ge/hybrid/model/hybrid_model.cc | 18 ++++++++++----- ge/hybrid/model/hybrid_model.h | 3 ++- ge/ir_build/ge_ir_build.cc | 4 ++-- ge/opskernel_manager/ops_kernel_manager.cc | 8 +++---- ge/session/omg.cc | 3 ++- ge/single_op/single_op.cc | 9 +++++--- ge/single_op/single_op_model.cc | 3 ++- ge/single_op/task/tbe_task_builder.cc | 3 ++- inc/framework/common/taskdown_common.h | 2 +- 30 files changed, 100 insertions(+), 59 deletions(-) diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc index 872fe1da..527f0bb2 100644 --- a/ge/common/debug/memory_dumper.cc +++ b/ge/common/debug/memory_dumper.cc @@ -139,7 +139,8 @@ int MemoryDumper::OpenFile(const char *filename) { GE_IF_BOOL_EXEC( -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, + return kInvalidFd, "Prefix path is too long!"); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd, "Dir %s does not exit.", prefix_path.c_str()); real_path = std::string(tmp_path) + last_path;) diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index e55af956..fdfac68e 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -189,7 +189,8 @@ Status ModelHelper::SaveModelHeader(std::shared_ptr &om_file_s err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), platform_version.size() + 1); if (err != EOK) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelHelper SaveModel failed while allocating memory for platform_version."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, + "ModelHelper SaveModel failed while allocating memory for platform_version."); return ACL_ERROR_GE_MEMORY_ALLOCATION; } string version = reinterpret_cast(model_header.platform_version); diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index 36217ca1..d1c52b13 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -180,7 +180,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint context_.partition_datas_.push_back(partition); if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", + GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, + "The partition size %zu is greater than the model data size %u.", partition.size + mem_offset, model_data_size); return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; } diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 3e916916..26b0a323 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -639,7 +639,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return ACL_ERROR_GE_INTERNAL_ERROR; } - std::shared_ptr hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); + std::shared_ptr hybrid_davinci_model = + ModelManager::GetInstance()->GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { uint64_t session_id = hybrid_davinci_model->GetSessionId(); VarManagerPool::Instance().RemoveVarManager(session_id); diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 1c20b796..87d2a206 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -349,7 +349,8 @@ static Status GenerateTaskForConstant(const std::shared_ptr &graph GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { - GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); + GELOGE(FAILED, "Insert memcpy between %s and %s failed.", + in_node->GetName().c_str(), node->GetName().c_str()); return FAILED; } } diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 16420123..570828eb 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -21,7 +21,7 @@ namespace { const uint32_t kRangeCeilInterval = 2; const uint32_t kLogBase = 2; -const int64_t kLargeBlockSize = 8 * 1024 * 1024; +const int64_t kLargeBlockSize = 8 * 1024 * 1024; // 8M const int64_t kLargeBlockRangeSize = 2; } // namespace diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index bd2a9912..9dc0cf73 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1416,7 +1416,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { out_node_set_continuous_input = - IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory, reset_zero_copy_flag); + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, + no_need_assign_memory, reset_zero_copy_flag); GE_IF_BOOL_EXEC(!no_need_assign_memory, no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } @@ -1499,7 +1500,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { bool workspace_skip_flag = false; if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { GELOGI( - "fusion: node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", + "fusion:node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); workspace_skip_flag = true; } diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index a6da4682..16d5d38f 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -419,7 +419,8 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two. This node is not supported now."; + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return PARAM_INVALID;); @@ -429,7 +430,8 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(is_peer_reference, std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two. This node is not supported now."; + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return PARAM_INVALID;); diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 4378f71b..a1cda506 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -49,7 +49,8 @@ inline bool HasContinuousStreamLabel(const ge::OpDescPtr &op_desc, std::string & } bool IsHcclOp(const string &op_type) { - const set hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER, ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE}); + const set hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER, + ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE}); return hccl_op_types.find(op_type) != hccl_op_types.end(); } } // namespace diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index aa825a5d..44556422 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -283,7 +283,8 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn std::vector &output_desc) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); + Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, + input_data, input_desc, output_data, output_desc); if (ret != SUCCESS) { GELOGE(ret, "Execute model failed, model_id:%u.", model_id); return ret; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 72a562bf..181347f4 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -83,7 +83,7 @@ const uint32_t kAddrLen = sizeof(void *); const int kDecimal = 10; const int kBytes = 8; const uint32_t kDataMemAlignSizeCompare = 64; -const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; +const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; // 2M const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; @@ -330,8 +330,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } - GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - mem_base_, data_size); + GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", + runtime_param_.graph_id, mem_base_, data_size); if (!is_inner_weight_base_) { weights_mem_base_ = mem_base_; @@ -1543,7 +1543,8 @@ Status DavinciModel::LoadWithQueue() { } if (output_queue_ids_.size() != new_output_data_info_.size()) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", + GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, + "Output queue ids not match model: output_queue=%zu output_data=%zu", output_queue_ids_.size(), new_output_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } @@ -3391,14 +3392,14 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 /// Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { - GELOGE(PARAM_INVALID, "[ZCPY] Update input data to model failed."); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); + return ACL_ERROR_GE_PARAM_INVALID; } if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { - GELOGE(PARAM_INVALID, "[ZCPY] Update output data to model failed."); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); + return ACL_ERROR_GE_PARAM_INVALID; } for (ZeroCopyTask &task : zero_copy_tasks_) { @@ -3861,7 +3862,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa if (!is_async_mode_) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, + "Copy Output data to user failed."); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); } @@ -4061,7 +4063,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { data_dumper_.SetDeviceId(device_id); // set loop count addr - auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void * { + auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void *{ if (op != nullptr) { auto v_output_size = ModelUtils::GetOutputSize(op); auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param, op); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index fdc4915f..0b55d150 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1254,7 +1254,8 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + "Invalid model id %u, check weather model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 442a1383..4fb64aab 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -281,7 +281,8 @@ Status HcclTaskInfo::SetAddrs(const std::shared_ptr &op_desc, kernel_hccl_infos[i].inputDataAddr = input_data_addr; if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) { kernel_hccl_infos[i].outputDataAddr = output_data_addr; - } else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE || hccl_type == HCOMREDUCE) { + } else if (hccl_type == HCOMALLREDUCE || + hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE || hccl_type == HCOMREDUCE) { GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type), "davinci_model: GetHcomOperationType fail!"); kernel_hccl_infos[i].outputDataAddr = output_data_addr; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 1f398309..74faeb24 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1172,8 +1172,8 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u } ccStatus_t cc_ret; std::string update_kernel_args = "ccUpdateKernelArgs"; - auto cceUpdateKernelArgs = (ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, - void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); + auto cceUpdateKernelArgs = (ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, + uint64_t, void *, uint64_t, void *))mmDlsym(handle, const_cast(update_kernel_args.c_str())); if (cceUpdateKernelArgs == nullptr) { GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); if (mmDlclose(handle) != 0) { diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 90437d77..364a360f 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -549,8 +549,13 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr if (!op_compile_strategy.empty()) { (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, compute_graph, session_id, GetThreadLocalContext()); + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, + this, + compute_graph->GetGraphID(), + subgraph, + compute_graph, + session_id, + GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; diff --git a/ge/graph/manager/util/hcom_util.cc b/ge/graph/manager/util/hcom_util.cc index 487b24af..50fa9936 100644 --- a/ge/graph/manager/util/hcom_util.cc +++ b/ge/graph/manager/util/hcom_util.cc @@ -263,7 +263,8 @@ Status HcomOmeUtil::GetHcclRootId(const ge::ConstOpDescPtr &op_desc, int64_t &ro Status HcomOmeUtil::GetAllRootId(const ge::ConstOpDescPtr &op_desc, std::vector &kernel_hccl_infos) { GE_CHECK_NOTNULL(op_desc); - if (op_desc->GetType() == HCOMBROADCAST || op_desc->GetType() == HVDCALLBACKBROADCAST || op_desc->GetType() == HCOMREDUCE) { + if (op_desc->GetType() == HCOMBROADCAST || + op_desc->GetType() == HVDCALLBACKBROADCAST || op_desc->GetType() == HCOMREDUCE) { GELOGI("GetAllRootId Node[%s] opType[%s] get hccl rootId.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); int64_t root_id = 0; Status dmrt = GetHcclRootId(op_desc, root_id); diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index 88e661a7..d1111d52 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -149,10 +149,10 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node // 5. While->NetOutput in known subgraph std::string op_type; bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || - IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || - ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || - (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && - (kWhileOpTypes.count(in_node->GetType()) != 0)); + IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || + ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || + (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 93b261aa..22ae62de 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1621,7 +1621,8 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { if (desc.GetShape().GetDim(i) < 0) { - std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]" ; + std::string situation = "data dim[" + std::to_string(i) + "][" + + std::to_string(desc.GetShape().GetDim(i)) + "]" ; std::string reason = "it need >= 0"; ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index 57af4026..3661fa9d 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -180,8 +180,12 @@ Status SsdPriorboxKernel::SetVariance(const vector &variance, const int d return SUCCESS; } -Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, - int layer_width, int layer_height, int &num_priors, +Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, + uint32_t min_sizes_size, + uint32_t max_sizes_size, + int layer_width, + int layer_height, + int &num_priors, int &dim_size) const { if (ge::CheckUint32MulOverflow(min_sizes_size, aspect_ratios_size) != SUCCESS) { return PARAM_INVALID; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 91996ab3..e03ebc31 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -379,11 +379,13 @@ Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, } if (output_real_size > 0) { if (outputs[i].length < static_cast(output_real_size)) { - GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", + GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by " + "user should be greater than or equal to the real size of output[%ld]", i, outputs[i].length, output_real_size); return FAILED; } - GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, + args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); } outputs[i].length = output_real_size; } diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index d4019eda..1d813526 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -62,7 +62,8 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { { std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); - GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); + GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), + "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } // Check again to make sure shape is valid after shape inference diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index c319b06b..132b0f8c 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -176,7 +176,8 @@ Status HybridModel::GetInputOutputDescInfo(vector &input_de return SUCCESS; } -void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, +void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, + std::vector> &shape_ranges, InputOutputDescInfo &input) { for (auto model_input_dim : model_input_dims) { input.shape_info.dims.push_back(model_input_dim); @@ -245,7 +246,8 @@ Status HybridModel::GetInputDescInfo(vector &input_desc, st return SUCCESS; } -void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { +void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, + InputOutputDescInfo &output_desc_info, uint32_t &format_result) { GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); Format format = output_desc->GetFormat(); GeShape shape = output_desc->GetShape(); @@ -283,7 +285,8 @@ void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDes Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { std::vector output_desc_list; - GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc + // output_desc_list contains vaild input desc + GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); vector out_node_names; (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); @@ -293,7 +296,8 @@ Status HybridModel::GetOutputDescInfo(vector &output_desc, GE_CHECK_NOTNULL(op_desc); auto out_size = static_cast(op_desc->GetInputsSize()); - GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); + GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), + FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); for (uint32_t index = 0; index < out_size; ++index) { string output_name; @@ -301,9 +305,11 @@ Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector src_index = op_desc->GetSrcIndex(); if (out_size == out_node_names.size()) { bool contains_colon = out_node_names[index].find(":") != std::string::npos; - output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); + output_name = contains_colon ? out_node_names[index] : out_node_names[index] + + ":" + std::to_string(src_index[index]); } else { - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); + output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + + "_" + std::to_string(src_index[index]); } InputOutputDescInfo output_desc_info; diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 1bc08053..5fd5f8f5 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -104,7 +104,8 @@ class HybridModel { void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } - void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, + void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, + std::vector> &shape_ranges, InputOutputDescInfo &input); private: diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c010a690..319ae8d3 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -590,7 +590,7 @@ graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &m GELOGE(GRAPH_PARAM_INVALID, "input model is illegal"); return GRAPH_PARAM_INVALID; } - return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), + return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } @@ -605,7 +605,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod return GRAPH_PARAM_INVALID; } std::string str_output_file = output_file; - return FileSaver::SaveToFile((str_output_file + ".om"), reinterpret_cast(model.data.get()), + return FileSaver::SaveToFile((str_output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index 8134a463..e9c72a37 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -175,8 +175,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, } else if (flag == 1) { enable_flag = true; } else { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { @@ -188,8 +188,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } else { diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 80a13ea7..7ff52e82 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -644,7 +644,8 @@ Status ParseOutNodes(const string &out_nodes) { if (!domi::GetContext().user_out_nodes_top_vec.empty()) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, {"--out_nodes", out_nodes, "is not all index or top_name"}); - GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", out_nodes.c_str()); + GELOGE(PARAM_INVALID, + "This out_nodes str must be all index or top_name, while the actual input is %s", out_nodes.c_str()); return PARAM_INVALID; } // stoi: The method may throw an exception: invalid_argument/out_of_range diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index a8f7165a..2ab40d82 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -109,7 +109,8 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: auto num_outputs = outputs.size(); if (num_outputs != output_sizes_.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output num mismatch. model expect %zu, but given %zu", output_sizes_.size(), outputs.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output num mismatch. model expect %zu, but given %zu", + output_sizes_.size(), outputs.size()); return ACL_ERROR_GE_PARAM_INVALID; } @@ -248,12 +249,14 @@ Status DynamicSingleOp::ValidateParams(const vector &input_desc, } if (input_desc.size() != num_inputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input number mismatches. expect %zu, but given %zu", num_inputs_, input_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input number mismatches. expect %zu, but given %zu", + num_inputs_, input_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } if (output_desc.size() != num_outputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output number mismatches. expect %zu, but given %zu", num_outputs_, output_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output number mismatches. expect %zu, but given %zu", + num_outputs_, output_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index b453e0bc..525f479b 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -263,7 +263,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { task->SetModelArgs(model_name_, model_id_); single_op.tasks_.emplace_back(task); } else { - GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, + "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; } } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index e06a08c6..594352aa 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -173,7 +173,8 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam auto tbe_kernel = GetTbeKernel(op_desc_); if (tbe_kernel == nullptr) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", op_desc_->GetName().c_str()); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", + op_desc_->GetName().c_str()); return ACL_ERROR_GE_INTERNAL_ERROR; } diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h index 12c6af89..090e7e26 100644 --- a/inc/framework/common/taskdown_common.h +++ b/inc/framework/common/taskdown_common.h @@ -21,7 +21,7 @@ namespace ge { -#define CC_FUSION_OP_MAX 32 +const int CC_FUSION_OP_MAX = 32; typedef enum tagCcStatus { CC_STATUS_SUCCESS = 0, /**< succ */ From 15c03ba5c6d880f78273731199890ca7c00a2923 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Fri, 11 Dec 2020 18:28:40 +0800 Subject: [PATCH 088/127] add aclgrphGenerateForOp --- ge/generator/ge_generator.cc | 77 +++++++++++++++----------- ge/ir_build/ge_ir_build.cc | 48 ++++++++++++++++ inc/external/ge/ge_ir_build.h | 15 +++++ inc/framework/generator/ge_generator.h | 15 ++++- 4 files changed, 120 insertions(+), 35 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index dc64aac1..a1a45028 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -584,40 +584,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in // 2. Create ComputeGraph. string name = ge::CurrentTimeInStr() + "_" + model_file_name; - ge::ComputeGraphPtr compute_graph = MakeShared(name); - GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); - - // 3. Add Node to ComputeGraph. - NodePtr op_node = compute_graph->AddNode(op_desc); - GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR); - - // 4. Create InputData node. - int32_t arg_index = 0; - if (inputs.empty()) { - for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { - GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); - if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { - continue; - } - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); - arg_index++; - } - } else { - for (const auto &in_desc : inputs) { - GeTensorDesc input_desc = in_desc.GetTensorDesc(); - GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); - arg_index++; - } + Graph graph; + if (BuildSingleOpGraph(op_desc, inputs, outputs, name, graph) != ge::SUCCESS) { + GELOGE(GRAPH_FAILED, "make graph fail."); + return GRAPH_FAILED; } - - // 5. Create Output node. - if (!outputs.empty()) { - GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs)); - } - - // dump ComputeGraph. - compute_graph->Dump(); - Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); GELOGI("ATC parser success in single op build."); GeRootModelPtr ge_root_model = nullptr; @@ -673,6 +644,46 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, + const vector &outputs, std::string graph_name, Graph &graph) { + ge::ComputeGraphPtr compute_graph = MakeShared(graph_name); + GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); + + // 1. Add Node to ComputeGraph. + NodePtr op_node = compute_graph->AddNode(op_desc); + GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR); + + // 2. Create InputData node. + int32_t arg_index = 0; + if (inputs.empty()) { + for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { + GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); + if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { + continue; + } + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); + arg_index++; + } + } else { + for (const auto &in_desc : inputs) { + GeTensorDesc input_desc = in_desc.GetTensorDesc(); + GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); + arg_index++; + } + } + + // 3. Create Output node. + if (!outputs.empty()) { + GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs)); + } + + // dump ComputeGraph node. + compute_graph->Dump(); + graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); + + return SUCCESS; +} + Status GeGenerator::Impl::SaveParams(GeModelPtr &ge_model, const string &type, const map &attrs, const vector &inputs, const vector &outputs) { GE_CHECK_NOTNULL_EXEC(ge_model, return PARAM_INVALID); diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c010a690..01c8c558 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -710,4 +710,52 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz return GRAPH_SUCCESS; } +graphStatus aclgrphGenerateForOp(const AscendString &op_type, const vector &inputs, + const vector &outputs, Graph &graph) { + auto op_type_str = std::string(op_type.GetString()); + auto op_name = op_type_str + "_" + std::to_string(ge::GetCurrentTimestamp()); + auto op_desc = ge::MakeShared(op_name, op_type_str); + GE_CHECK_NOTNULL(op_desc); + + // convert input tensordesc to getensor + std::vector input_tensors; + for (const auto &input : inputs) { + ge::GeTensorDesc tensor_desc(ge::GeShape(input.GetShape().GetDims()), input.GetFormat(), input.GetDataType()); + + tensor_desc.SetOriginFormat(input.GetFormat()); + ge::TensorUtils::SetRealDimCnt(tensor_desc, static_cast(input.GetShape().GetDims().size())); + ge::TensorUtils::SetInputTensor(tensor_desc, true); + ge::TensorUtils::SetOutputTensor(tensor_desc, false); + + if (op_desc->AddInputDesc(tensor_desc) != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "AddInputDesc fail."); + return ge::FAILED; + } + input_tensors.emplace_back(tensor_desc); + } + + // convert output tensordesc to getensor + std::vector output_tensors; + for (const auto &output : outputs) { + ge::GeTensorDesc tensor_desc(ge::GeShape(output.GetShape().GetDims()), output.GetFormat(), output.GetDataType()); + + tensor_desc.SetOriginFormat(output.GetFormat()); + ge::TensorUtils::SetRealDimCnt(tensor_desc, static_cast(output.GetShape().GetDims().size())); + ge::TensorUtils::SetInputTensor(tensor_desc, false); + ge::TensorUtils::SetOutputTensor(tensor_desc, true); + + (void)op_desc->AddOutputDesc(tensor_desc); + output_tensors.emplace_back(tensor_desc); + } + + // call api to get graph + ge::GeGenerator generator; + std::string graph_name = ge::CurrentTimeInStr() + "_graph"; + if (generator.BuildSingleOpGraph(op_desc, input_tensors, output_tensors, graph_name, graph) != ge::SUCCESS) { + GELOGE(GRAPH_FAILED, "make graph fail."); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; +} + } // namespace ge diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 778ec21d..19821e1a 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -121,5 +121,20 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph); * @retval OtherValues Failure */ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); + +/** + * @ingroup AscendCL + * @brief create single op graph + * + * @param op_type[IN] the op_type + * @param inputs[IN] the inputdesc + * @param outputs[IN] the outputdesc + * @param graph[OUT] the graph + * @retval GRAPH_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector &inputs, + const std::vector &outputs, Graph &graph); + }; // namespace ge #endif // INC_EXTERNAL_GE_IR_BUILD_H_ diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index c446b983..e0904965 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -74,11 +74,22 @@ class GeGenerator { /// @param [in] op_desc: the OP description. /// @param [in] inputs: input tensors. /// @param [in] outputs: output tensors. - /// @param [in] engine_type: specific engine. - /// @param [out] model_buff: model buff of single op. + /// @param [in] engine_type: engine type. + /// @param [out] model_buff: model buff of op. /// @return SUCCESS or FAILED Status BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff); + /// + /// @ingroup ge + /// @brief: Build single Op into model buff. + /// @param [in] op_desc: the OP description. + /// @param [in] inputs: input tensors. + /// @param [in] outputs: output tensors. + /// @param [in] graph_name: graph name. + /// @param [out] graph: graph of single op. + /// @return SUCCESS or FAILED + Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, + std::string graph_name, Graph &graph); private: Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, From a703bea744b51cf8549bd2ce5c167859708d8ca0 Mon Sep 17 00:00:00 2001 From: lianghao Date: Fri, 11 Dec 2020 19:58:37 +0800 Subject: [PATCH 089/127] TransOpWithoutReshapeFusionPass --- ge/graph/passes/transop_without_reshape_fusion_pass.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index f4584cad..6bea9edc 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -736,8 +736,12 @@ void TransOpWithoutReshapeFusionPass::RemoveNousedNodes(const ComputeGraphPtr &g GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(kRemainNode, true), GELOGE(INTERNAL_ERROR, "set ext attr failed"); return); GELOGI("remove node:%s", node->GetName().c_str()); - if (graph->RemoveNode(node) != GRAPH_SUCCESS) { - GELOGW("remove node failed!node:%s", node->GetName().c_str()); + if (GraphUtils::IsolateNode(node, {0}) != GRAPH_SUCCESS) { + GELOGW("Isolate node: %s failed.", node->GetName().c_str()); + continue; + } + if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != GRAPH_SUCCESS) { + GELOGW("Remove node: %s failed.", node->GetName().c_str()); continue; } } From b373acb0e228f0b164b6cc6b1ea42b58b6f93d6b Mon Sep 17 00:00:00 2001 From: chuxing Date: Fri, 11 Dec 2020 20:49:22 +0800 Subject: [PATCH 090/127] ACL single op refactory --- ge/single_op/single_op.cc | 137 +++------------- ge/single_op/single_op.h | 16 +- ge/single_op/single_op_model.cc | 30 ++-- ge/single_op/single_op_model.h | 4 +- ge/single_op/stream_resource.cc | 34 +++- ge/single_op/stream_resource.h | 3 +- .../task/aicpu_kernel_task_builder.cc | 21 ++- ge/single_op/task/aicpu_kernel_task_builder.h | 4 +- ge/single_op/task/aicpu_task_builder.cc | 48 +----- ge/single_op/task/aicpu_task_builder.h | 4 +- ge/single_op/task/build_task_utils.cc | 7 +- ge/single_op/task/build_task_utils.h | 8 +- ge/single_op/task/op_task.cc | 154 ++++++++++++++---- ge/single_op/task/op_task.h | 92 ++++------- 14 files changed, 278 insertions(+), 284 deletions(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 2ab40d82..6e51b6ff 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -25,6 +25,7 @@ #include "graph/load/new_model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" +#include "single_op/task/build_task_utils.h" #include "graph/load/new_model_manager/model_manager.h" namespace ge { @@ -77,7 +78,8 @@ Status ProfilingTaskInfo(OpTask *op_task) { } } // namespace -SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { +SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) + : stream_resource_(stream_resource), stream_mutex_(stream_mutex), stream_(stream) { } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { @@ -159,37 +161,6 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve *arg_addr = args_[i]; } } - // update aicpu_TF or aicpu_CC args - for (auto &task : tasks_) { - size_t io_addr_num = args_.size(); - if (task->GetOpTaskType() == OP_TASK_AICPU) { - GELOGD("Update aicpu_TF task args"); - task->SetIoAddrsForDump(args_); - auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); - GE_CHECK_NOTNULL(dst_io_addr); - auto rt_ret = rtMemcpyAsync(dst_io_addr, - sizeof(uint64_t) * args_.size(), - &args_[0], - sizeof(uint64_t) * args_.size(), - RT_MEMCPY_HOST_TO_DEVICE_EX, - stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMemcpyAsync addresses failed, ret = %d", rt_ret); - return rt_ret; - } - } else if (task->GetOpTaskType() == OP_TASK_AICPUCC) { - GELOGD("Update aicpu_CC task args"); - const uintptr_t *task_io_addr = reinterpret_cast(task->GetIOAddr()); - GE_CHECK_NOTNULL(task_io_addr); - auto io_addr = reinterpret_cast(const_cast(task_io_addr)); - for (size_t i = 0; i < io_addr_num; ++i) { - io_addr[i] = static_cast(args_[i]); - } - } else { - GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); - continue; - } - } return SUCCESS; } @@ -200,7 +171,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } + GE_CHECK_NOTNULL(stream_resource_); std::lock_guard lk(*stream_mutex_); + auto current_mem_base = stream_resource_->GetMemoryBase(); + if (running_param_->mem_base != current_mem_base) { + running_param_->mem_base = const_cast(current_mem_base); + GELOGD("Memory base changed, new memory base = %p", current_mem_base); + for (auto &task : tasks_) { + auto new_address = BuildTaskUtils::GetAddresses(task->GetOpdesc(), *running_param_); + GE_CHK_STATUS_RET(task->UpdateArgTable(*running_param_), + "[%s] Failed to update arg table", + task->GetOpdesc()->GetName().c_str()); + } + } ret = UpdateArgs(inputs, outputs); if (ret != SUCCESS) { return ret; @@ -225,9 +208,6 @@ DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { } -DynamicSingleOp::~DynamicSingleOp() { -} - Status DynamicSingleOp::ValidateParams(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, @@ -249,65 +229,24 @@ Status DynamicSingleOp::ValidateParams(const vector &input_desc, } if (input_desc.size() != num_inputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input number mismatches. expect %zu, but given %zu", - num_inputs_, input_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "Input number mismatches. expect %zu, but given %zu", + num_inputs_, + input_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } if (output_desc.size() != num_outputs_) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output number mismatches. expect %zu, but given %zu", - num_outputs_, output_desc.size()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "Output number mismatches. expect %zu, but given %zu", + num_outputs_, + output_desc.size()); return ACL_ERROR_GE_PARAM_INVALID; } return SUCCESS; } -Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace_sizes, - std::vector &workspaces) { - static const std::string kPurpose("malloc workspace memory for dynamic op."); - if (workspace_sizes.empty()) { - GELOGD("No need to allocate workspace."); - return SUCCESS; - } - int64_t total_size = 0; - std::vector ws_offsets; - for (auto ws_size : workspace_sizes) { - // alignment and padding should be done in OpParaCalculate - GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); - ws_offsets.emplace_back(total_size); - total_size += ws_size; - } - - GELOGD("Total workspace size is %ld", total_size); - StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); - GE_CHECK_NOTNULL(stream_resource); - auto ws_base = stream_resource->MallocMemory(kPurpose, static_cast(total_size)); - if (ws_base == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to allocate memory of size: %ld", total_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - GELOGD("Done allocating workspace memory successfully."); - - for (auto ws_offset : ws_offsets) { - workspaces.emplace_back(ws_base + ws_offset); - } - - return SUCCESS; -} - -Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs) { - GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); - - std::vector workspace_buffers; - GE_CHK_STATUS_RET_NOLOG(AllocateWorkspaces(op_task_->GetWorkspaceSizes(), workspace_buffers)); - - return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); -} - Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, @@ -316,32 +255,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); std::lock_guard lk(*stream_mutex_); - std::vector inputs; - std::vector outputs; - for (auto &buffer : input_buffers) { - inputs.emplace_back(buffer.data); - } - for (auto &buffer : output_buffers) { - outputs.emplace_back(buffer.data); - } - - if (op_task_->GetOpTaskType() == OP_TASK_TBE) { - auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); - if (ret == SUCCESS) { - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); - } - return ret; - } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); - if (aicpu_ret == SUCCESS) { - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); - } - return aicpu_ret; - } else { - GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, - "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", - op_task_->GetOpTaskType()); - return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; - } + GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); + return SUCCESS; } } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 14ef8ce1..d677f94a 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -30,9 +30,11 @@ #include "cce/aicpu_engine_struct.h" namespace ge { +class StreamResource; +struct SingleOpModelParam; class SingleOp { public: - SingleOp(std::mutex *stream_mutex, rtStream_t stream); + SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream); ~SingleOp(); Status ExecuteAsync(const std::vector &inputs, const std::vector &outputs); @@ -44,6 +46,7 @@ class SingleOp { Status GetArgs(const std::vector &inputs, const std::vector &outputs); friend class SingleOpModel; + StreamResource *stream_resource_; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; std::vector input_addr_list_; @@ -54,12 +57,13 @@ class SingleOp { std::vector tasks_; std::vector> arg_table_; + std::unique_ptr running_param_; }; class DynamicSingleOp { public: DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex_, rtStream_t stream); - ~DynamicSingleOp(); + ~DynamicSingleOp() = default; Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, @@ -72,14 +76,6 @@ class DynamicSingleOp { std::vector &output_desc, std::vector &outputs) const; - Status AllocateWorkspaces(const std::vector &workspace_sizes, - std::vector &workspaces); - - Status ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs); - std::unique_ptr op_task_; uintptr_t resource_id_ = 0; std::mutex *stream_mutex_; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 525f479b..6b4f6b04 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -92,7 +92,8 @@ Status SingleOpModel::InitModelMem(StreamResource &res) { if (model_params_.memory_size > model_params_.zero_copy_mem_size) { const string purpose("malloc feature map memory on model execute."); GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size); - model_params_.mem_base = res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size); + model_params_.mem_base = + res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size, false); if (model_params_.mem_base == nullptr) { return ACL_ERROR_GE_MEMORY_ALLOCATION; } @@ -226,9 +227,10 @@ Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) { return SUCCESS; } -Status SingleOpModel::BuildTaskList(SingleOp &single_op) { +Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &single_op) { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); + single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); auto tasks = ge_model->GetModelTaskDefPtr()->task(); for (int i = 0; i < tasks.size(); ++i) { const TaskDef &task_def = tasks[i]; @@ -247,9 +249,11 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return ret; } - single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); tbe_task->SetModelArgs(model_name_, model_id_); + if (tbe_task->tiling_buffer_ != nullptr) { + tbe_task->stream_resource_ = stream_resource; + } single_op.tasks_.emplace_back(tbe_task); } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); @@ -261,6 +265,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return ret; } task->SetModelArgs(model_name_, model_id_); + ParseArgTable(task, single_op); single_op.tasks_.emplace_back(task); } else { GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, @@ -278,6 +283,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return ret; } aicpu_task->SetModelArgs(model_name_, model_id_); + ParseArgTable(aicpu_task, single_op); single_op.tasks_.emplace_back(aicpu_task); } else { // skip @@ -287,21 +293,23 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { return SUCCESS; } -void SingleOpModel::ParseArgTable(TbeOpTask *task, SingleOp &op) { +void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { if (task == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "tbe op task is nullptr"); return; } + // args: addr1, addr2, addr3 ... - auto *args = const_cast(reinterpret_cast(task->GetArgs())); - size_t arg_size = task->GetArgSize(); - for (size_t i = 0; i < arg_size / sizeof(void *); ++i) { - uintptr_t *ptr_to_addr = args + i; + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + task->GetIoAddr(arg_base, arg_num); + for (size_t i = 0; i < arg_num; ++i) { + uintptr_t *ptr_to_addr = arg_base + i; uintptr_t addr = *ptr_to_addr; auto iter = model_params_.addr_mapping_.find(addr); if (iter != model_params_.addr_mapping_.end()) { int arg_index = iter->second; - GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetStubName().c_str(), i, arg_index); + GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetOpdesc()->GetName().c_str(), i, arg_index); op.arg_table_[iter->second].emplace_back(ptr_to_addr); } } @@ -386,8 +394,10 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); + single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); + GE_CHECK_NOTNULL(single_op.running_param_); GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); - return BuildTaskList(single_op); + return BuildTaskList(&resource, single_op); } Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 5f1c842a..c3164543 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -65,7 +65,7 @@ class SingleOpModel { Status ParseInputNode(const OpDescPtr &op_desc); void ParseOutputNode(const OpDescPtr &op_desc); - Status BuildTaskList(SingleOp &single_op); + Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, @@ -74,7 +74,7 @@ class SingleOpModel { Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); - void ParseArgTable(TbeOpTask *task, SingleOp &op); + void ParseArgTable(OpTask *task, SingleOp &op); std::string model_name_; uint32_t model_id_ = 0; diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc index f545b6c8..722a1024 100755 --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -69,11 +69,25 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, std::vector &allocated) { + if (size == 0) { + GELOGD("Mem size == 0"); + return nullptr; + } + if (size <= max_allocated && !allocated.empty()) { GELOGD("reuse last memory"); return allocated.back(); } + if (!allocated.empty()) { + uint8_t *current_buffer = allocated.back(); + allocated.pop_back(); + if (rtStreamSynchronize(stream_) != RT_ERROR_NONE) { + GELOGW("Failed to invoke rtStreamSynchronize"); + } + (void) rtFree(current_buffer); + } + uint8_t *buffer = nullptr; auto ret = rtMalloc(reinterpret_cast(&buffer), size, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { @@ -96,10 +110,14 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, return buffer; } -uint8_t *StreamResource::MallocMemory(const std::string &purpose, size_t size) { +uint8_t *StreamResource::MallocMemory(const std::string &purpose, size_t size, bool holding_lock) { GELOGD("To Malloc memory, size = %zu", size); - uint8_t *buffer = DoMallocMemory(purpose, size, max_memory_size_, memory_list_); - return buffer; + if (holding_lock) { + return DoMallocMemory(purpose, size, max_memory_size_, memory_list_); + } else { + std::lock_guard lk(stream_mu_); + return DoMallocMemory(purpose, size, max_memory_size_, memory_list_); + } } uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) { @@ -158,7 +176,7 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & return ret; } - auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(&stream_mu_, stream_)); + auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(this, &stream_mu_, stream_)); if (new_op == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "new SingleOp failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; @@ -171,4 +189,12 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & op_map_[model_data.model_data] = std::move(new_op); return SUCCESS; } + +const uint8_t *StreamResource::GetMemoryBase() const { + if (memory_list_.empty()) { + return nullptr; + } + + return memory_list_.back(); +} } // namespace ge diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h index 39f08ebe..d5bc941a 100755 --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -45,8 +45,9 @@ class StreamResource { Status BuildOperator(const std::string &model_name, const ModelData &model_data, SingleOp **single_op); Status BuildDynamicOperator(const std::string &model_name, const ModelData &model_data, DynamicSingleOp **single_op); - uint8_t *MallocMemory(const std::string &purpose, size_t size); + uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); uint8_t *MallocWeight(const std::string &purpose, size_t size); + const uint8_t *GetMemoryBase() const; private: uint8_t *DoMallocMemory(const std::string &purpose, diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index cd218c94..c676ccf8 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -17,17 +17,22 @@ #include "single_op/task/aicpu_kernel_task_builder.h" #include "framework/common/taskdown_common.h" #include "graph/load/new_model_manager/model_manager.h" +#include "build_task_utils.h" namespace ge { AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) : op_desc_(op_desc), kernel_def_(kernel_def) {} -Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { +Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task, const SingleOpModelParam ¶m) { size_t aicpu_arg_size = kernel_def_.args_size(); - if (aicpu_arg_size <= 0) { + if (aicpu_arg_size <= sizeof(aicpu::AicpuParamHead)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "aicpu_arg_size is invalid, value = %zu", aicpu_arg_size); return ACL_ERROR_GE_PARAM_INVALID; } + + task.io_addr_num_ = op_desc_->GetInputsSize() + op_desc_->GetOutputsSize(); + GE_CHECK_GE(aicpu_arg_size - sizeof(aicpu::AicpuParamHead), task.io_addr_num_ * sizeof(void *)); + std::unique_ptr aicpu_args; aicpu_args.reset(new(std::nothrow) uint8_t[aicpu_arg_size]()); if (aicpu_args == nullptr) { @@ -41,13 +46,19 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { return ACL_ERROR_GE_INTERNAL_ERROR; } - task.SetIoAddr(aicpu_args.get() + sizeof(aicpu::AicpuParamHead)); + task.SetIoAddr(reinterpret_cast(aicpu_args.get() + sizeof(aicpu::AicpuParamHead))); task.SetKernelArgs(std::move(aicpu_args), aicpu_arg_size); + + auto addresses = BuildTaskUtils::GetKernelArgs(op_desc_, param); + GE_CHECK_GE(addresses.size(), task.io_addr_num_); + for (size_t i = 0; i < task.io_addr_num_; ++i) { + task.io_addr_[i] = reinterpret_cast(addresses[i]); + } return SUCCESS; } -Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { - auto ret = SetKernelArgs(task); +Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, const SingleOpModelParam ¶m) { + auto ret = SetKernelArgs(task, param); if (ret != SUCCESS) { return ret; } diff --git a/ge/single_op/task/aicpu_kernel_task_builder.h b/ge/single_op/task/aicpu_kernel_task_builder.h index e77e3c10..85d5034d 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.h +++ b/ge/single_op/task/aicpu_kernel_task_builder.h @@ -30,10 +30,10 @@ class AiCpuCCTaskBuilder { explicit AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def); ~AiCpuCCTaskBuilder() = default; - Status BuildTask(AiCpuCCTask &task, uint64_t kernel_id); + Status BuildTask(AiCpuCCTask &task, uint64_t kernel_id, const SingleOpModelParam ¶m); private: - Status SetKernelArgs(AiCpuCCTask &task); + Status SetKernelArgs(AiCpuCCTask &task, const SingleOpModelParam ¶m); const OpDescPtr op_desc_; const domi::KernelDef &kernel_def_; }; diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 8f28ffda..0cc5c554 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -26,26 +26,6 @@ namespace ge { AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) : op_desc_(op_desc), kernel_def_(kernel_def) {} - Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { - size_t arg_size = kernel_def_.args_size(); - auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); - return rt_ret; - } - - const void *src_addr = reinterpret_cast(addresses.data()); - uint64_t src_len = sizeof(void *) * addresses.size(); - rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(*io_addr); - GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", rt_ret); - return rt_ret; - } - - return SUCCESS; - } - Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); @@ -80,39 +60,27 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag) { + Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag) { if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); return ACL_ERROR_GE_PARAM_INVALID; } - auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); - auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); - - if (dynamic_flag) { - GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); - } else { - if (ws_addr_vec.empty()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "workspace Data Address is empty."); - return ACL_ERROR_GE_PARAM_INVALID; - } - *kernel_workspace = ws_addr_vec[0]; - } - GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), + GE_CHK_RT_RET(rtMalloc(&task.workspace_addr_, kernel_def_.task_info_size(), RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMemcpy(task.workspace_addr_, kernel_def_.task_info_size(), kernel_def_.task_info().data(), kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); - auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); - if (ret != SUCCESS) { - return ret; - } + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false); + task.io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses); + task.io_addr_size_ = task.io_addr_host_.size() * sizeof(void *); + GE_CHK_RT_RET(rtMalloc(&task.io_addr_, task.io_addr_size_, RT_MEMORY_HBM)); return SUCCESS; } Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t kernel_id) { - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&task.io_addr_, &task.workspace_addr_, param, dynamic_flag)); + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param, dynamic_flag)); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index 4669e118..fe9c9bc2 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -33,10 +33,8 @@ namespace ge { private: static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); - Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag); + Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag); const OpDescPtr op_desc_; const domi::KernelExDef &kernel_def_; diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 29f1657b..071e514b 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -32,7 +32,8 @@ const uint64_t kVarSize = 0; } std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &op_desc, - const SingleOpModelParam ¶m) { + const SingleOpModelParam ¶m, + bool keep_workspace) { std::vector> ret; RuntimeParam runtime_para; runtime_para.mem_size = param.memory_size; @@ -49,7 +50,9 @@ std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &o ret.emplace_back(ModelUtils::GetInputDataAddrs(runtime_para, op_desc)); ret.emplace_back(ModelUtils::GetOutputDataAddrs(runtime_para, op_desc)); - ret.emplace_back(ModelUtils::GetWorkspaceDataAddrs(runtime_para, op_desc)); + if (keep_workspace) { + ret.emplace_back(ModelUtils::GetWorkspaceDataAddrs(runtime_para, op_desc)); + } return ret; } diff --git a/ge/single_op/task/build_task_utils.h b/ge/single_op/task/build_task_utils.h index cddc7a2b..7a2369e4 100644 --- a/ge/single_op/task/build_task_utils.h +++ b/ge/single_op/task/build_task_utils.h @@ -27,15 +27,17 @@ namespace ge { class BuildTaskUtils { public: + static constexpr int kAddressIndexOutput = 1; static constexpr int kAddressIndexWorkspace = 2; - static std::vector> GetAddresses(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); + static std::vector> GetAddresses(const OpDescPtr &op_desc, + const SingleOpModelParam ¶m, + bool keep_workspace = true); static std::vector JoinAddresses(const std::vector> &addresses); static std::vector GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); static std::string GetTaskInfo(const OpDescPtr &op_desc); template - static std::string VectorToString(const std::vector &values) - { + static std::string VectorToString(const std::vector &values) { std::stringstream ss; ss << '['; auto size = values.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index f8b019e9..a714c6a8 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -24,9 +24,11 @@ #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" #include "common/formats/formats.h" +#include "common/math/math_util.h" #include "framework/common/debug/log.h" #include "register/op_tiling.h" #include "runtime/rt.h" +#include "build_task_utils.h" namespace ge { namespace { @@ -48,18 +50,22 @@ Status OpTask::OpenDump(rtStream_t stream) { std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); auto output_size = op_desc_->GetOutputsSize(); - auto all_size = io_addrs_for_dump_.size(); - if (input_size + output_size != all_size) { - GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size, + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + if (arg_num < input_size + output_size) { + GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", + arg_num, input_size + output_size); return FAILED; } + for (size_t i = 0; i < input_size; i++) { - uint64_t input_addr = io_addrs_for_dump_[i]; + uint64_t input_addr = arg_base[i]; input_addrs.emplace_back(input_addr); } for (size_t j = 0; j < output_size; j++) { - uint64_t output_addr = io_addrs_for_dump_[input_size + j]; + uint64_t output_addr = arg_base[input_size + j]; output_adds.emplace_back(output_addr); } dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream); @@ -89,10 +95,6 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr &&args, size_t arg_size void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } -const vector &OpTask::GetWorkspaceSizes() const { return workspace_sizes_; } - -void OpTask::SetWorkspaceSizes(const vector &workspace_sizes) { workspace_sizes_ = workspace_sizes; } - void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { model_name_ = model_name; model_id_ = model_id; @@ -107,6 +109,36 @@ Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, u op_name = op_desc_->GetName(); return SUCCESS; } +Status OpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { + return UNSUPPORTED; +} +Status OpTask::UpdateArgTable(const SingleOpModelParam ¶m) { + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); + auto all_addresses = BuildTaskUtils::JoinAddresses(addresses); + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + if (arg_num != all_addresses.size()) { + GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu", + op_desc_->GetName().c_str(), + arg_num, + all_addresses.size()); + return INTERNAL_ERROR; + } + + for (void *addr : all_addresses) { + *arg_base++ = reinterpret_cast(addr); + } + return SUCCESS; +} + +Status OpTask::LaunchKernel(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream) { + return UNSUPPORTED; +} TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { @@ -141,12 +173,6 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { return RT_FAILED; } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); - - size_t input_size = op_desc_->GetInputsSize(); - size_t output_size = op_desc_->GetOutputsSize(); - uint64_t *io_addr = reinterpret_cast(args_.get()); - std::vector io_addrs(io_addr, io_addr + input_size + output_size); - SetIoAddrsForDump(io_addrs); auto status = OpenDump(stream); if (status != SUCCESS) { GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str()); @@ -167,11 +193,12 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); return FAILED; } - SetWorkspaceSizes(run_info.workspaces); block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, tiling_data_.size()); + + GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); return SUCCESS; } @@ -227,13 +254,54 @@ void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, s max_tiling_size_ = max_tiling_size; } -Status TbeOpTask::LaunchKernel(const vector &inputs, const vector &outputs, - const vector &workspaces, rtStream_t stream) { +Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { + static const std::string kPurpose("malloc workspace memory for dynamic op."); + if (workspace_sizes.empty()) { + GELOGD("No need to allocate workspace."); + return SUCCESS; + } + int64_t total_size = 0; + std::vector ws_offsets; + for (auto ws_size : workspace_sizes) { + // alignment and padding should be done in OpParaCalculate + GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); + ws_offsets.emplace_back(total_size); + total_size += ws_size; + } + + GELOGD("Total workspace size is %ld", total_size); + GE_CHECK_NOTNULL(stream_resource_); + auto ws_base = stream_resource_->MallocMemory(kPurpose, static_cast(total_size)); + if (ws_base == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to allocate memory of size: %ld", total_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + GELOGD("Done allocating workspace memory successfully."); + + for (auto ws_offset : ws_offsets) { + workspaces_.emplace_back(ws_base + ws_offset); + } + + return SUCCESS; +} + +Status TbeOpTask::LaunchKernel(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); std::vector args; - args.insert(args.end(), inputs.begin(), inputs.end()); - args.insert(args.end(), outputs.begin(), outputs.end()); - args.insert(args.end(), workspaces.begin(), workspaces.end()); + for (auto &buffer : input_buffers) { + args.emplace_back(buffer.data); + } + for (auto &buffer : output_buffers) { + args.emplace_back(buffer.data); + } + for (auto &buffer : workspaces_) { + args.emplace_back(buffer); + } if (tiling_buffer_ != nullptr) { GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); @@ -254,6 +322,14 @@ Status TbeOpTask::LaunchKernel(const vector &inputs, const vector(args_.get()); + arg_count = arg_size_ / sizeof(void *); + if (tiling_buffer_ != nullptr) { + --arg_count; + } +} + AiCpuBaseTask::~AiCpuBaseTask() { if (ext_info_addr_dev_ != nullptr) { (void)rtFree(ext_info_addr_dev_); @@ -399,12 +475,14 @@ AiCpuTask::~AiCpuTask() { } } -const void *AiCpuTask::GetIOAddr() const { return io_addr_; } - Status AiCpuTask::LaunchKernel(rtStream_t stream) { GELOGD("Start to launch kernel. task = %s", this->op_type_.c_str()); - auto ret = rtMemcpyAsync(workspace_addr_, task_info_.size(), task_info_.data(), task_info_.size(), - RT_MEMCPY_HOST_TO_DEVICE_EX, stream); + auto ret = rtMemcpyAsync(io_addr_, + io_addr_size_, + io_addr_host_.data(), + io_addr_host_.size() * sizeof(void *), + RT_MEMCPY_HOST_TO_DEVICE_EX, + stream); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); return RT_FAILED; @@ -680,6 +758,17 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, return SUCCESS; } +Status AiCpuTask::UpdateArgTable(const SingleOpModelParam ¶m) { + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false); + io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses); + return SUCCESS; +} + +void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { + arg_base = reinterpret_cast(io_addr_host_.data()); + arg_count = io_addr_host_.size(); +} + void AiCpuCCTask::SetKernelArgs(std::unique_ptr args, size_t arg_size) { args_ = std::move(args); arg_size_ = arg_size; @@ -691,9 +780,7 @@ void AiCpuCCTask::SetSoName(const std::string &so_name) { so_name_ = so_name; } void AiCpuCCTask::SetkernelName(const std::string &kernel_Name) { kernel_name_ = kernel_Name; } -void AiCpuCCTask::SetIoAddr(void *io_addr) { io_addr_ = io_addr; } - -const void *AiCpuCCTask::GetIOAddr() const { return io_addr_; } +void AiCpuCCTask::SetIoAddr(uintptr_t *io_addr) { io_addr_ = io_addr; } const void *AiCpuCCTask::GetArgs() const { return args_.get(); } @@ -716,12 +803,6 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { return ret; } GELOGD("Invoke rtCpuKernelLaunch succeeded"); - - size_t input_size = op_desc_->GetInputsSize(); - size_t output_size = op_desc_->GetOutputsSize(); - uint64_t *io_addr = reinterpret_cast(io_addr_); - std::vector io_addrs (io_addr, io_addr + input_size + output_size); - SetIoAddrsForDump(io_addrs); auto status = OpenDump(stream); if (status != SUCCESS) { GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str()); @@ -761,4 +842,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, return SUCCESS; } + +void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { + arg_base = io_addr_; + arg_count = io_addr_num_; +} } // namespace ge diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index df80088d..04e0def2 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -32,49 +32,27 @@ #include "init/gelib.h" namespace ge { -enum OpTaskType { - OP_TASK_TBE = 0, - OP_TASK_AICPU, - OP_TASK_AICPUCC, - OP_TASK_INVALID, -}; - +class StreamResource; +struct SingleOpModelParam; class OpTask { public: OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; virtual Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc) { - return UNSUPPORTED; - } - virtual Status LaunchKernel(const std::vector &inputs, - const std::vector &outputs, - const std::vector &workspaces, - rtStream_t stream) { - return UNSUPPORTED; - } - virtual OpTaskType GetOpTaskType() = 0; - virtual const void *GetIOAddr() const = 0; - const vector &GetWorkspaceSizes() const; - void SetWorkspaceSizes(const vector &workspace_sizes); + const vector &output_desc); + virtual Status UpdateArgTable(const SingleOpModelParam ¶m); void SetModelArgs(std::string model_name, uint32_t model_id); Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(rtStream_t stream); - void SetIoAddrsForDump(const vector &io_addrs_for_dump) { - io_addrs_for_dump_ = io_addrs_for_dump; - } + virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, std::vector &output_desc, std::vector &output_buffers, - rtStream_t stream) { - return UNSUPPORTED; - } + rtStream_t stream); - private: - std::vector workspace_sizes_; protected: DumpProperties dump_properties_; DumpOp dump_op_; @@ -82,19 +60,18 @@ class OpTask { std::string model_name_; uint32_t model_id_ = 0; uint32_t block_dim_ = 1; - std::vector io_addrs_for_dump_; }; class TbeOpTask : public OpTask { public: ~TbeOpTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_TBE; - } - const void *GetIOAddr() const override { - return nullptr; - } + Status LaunchKernel(const std::vector &input_desc, + const std::vector &input_buffers, + std::vector &output_desc, + std::vector &output_buffers, + rtStream_t stream) override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); @@ -102,20 +79,17 @@ class TbeOpTask : public OpTask { Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; - Status LaunchKernel(const vector &inputs, - const vector &outputs, - const vector &workspaces, - rtStream_t stream) override; - const void *GetArgs() const; size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); private: + friend class SingleOpModel; static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); + Status AllocateWorkspaces(const std::vector &workspace_sizes); const void *stub_func_ = nullptr; std::unique_ptr args_; @@ -123,9 +97,11 @@ class TbeOpTask : public OpTask { void *sm_desc_ = nullptr; std::string stub_name_; + StreamResource *stream_resource_ = nullptr; void *tiling_buffer_ = nullptr; uint32_t max_tiling_size_ = 0; std::string tiling_data_; + std::vector workspaces_; NodePtr node_; }; @@ -133,7 +109,7 @@ class AiCpuBaseTask : public OpTask { public: AiCpuBaseTask() = default; ~AiCpuBaseTask() override; - const UnknowShapeOpType GetUnknownType() const { return unknown_type_; } + UnknowShapeOpType GetUnknownType() const { return unknown_type_; } protected: Status SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id); @@ -158,10 +134,8 @@ class AiCpuTask : public AiCpuBaseTask { ~AiCpuTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_AICPU; - } - const void *GetIOAddr() const override; + Status UpdateArgTable(const SingleOpModelParam ¶m) override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, @@ -188,27 +162,31 @@ class AiCpuTask : public AiCpuBaseTask { friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; std::string task_info_; - // device addr + // device addr void *args_ = nullptr; size_t arg_size_ = 0; std::string op_type_; // device addr void *io_addr_ = nullptr; + size_t io_addr_size_ = 0; + + // host addr + std::vector io_addr_host_; bool dynamic_flag_ = false; // for copy task - void *copy_task_args_buf_; - void *copy_workspace_buf_; + void *copy_task_args_buf_ = nullptr; + void *copy_workspace_buf_ = nullptr; std::vector output_summary_; std::vector output_summary_host_; - void *copy_ioaddr_dev_; + void *copy_ioaddr_dev_ = nullptr; - void *copy_input_release_flag_dev_; - void *copy_input_data_size_dev_; - void *copy_input_src_dev_; - void *copy_input_dst_dev_; + void *copy_input_release_flag_dev_ = nullptr; + void *copy_input_data_size_dev_ = nullptr; + void *copy_input_src_dev_ = nullptr; + void *copy_input_dst_dev_ = nullptr; vector out_shape_hbm_; uint64_t kernel_id_ = 0; @@ -222,13 +200,12 @@ class AiCpuCCTask : public AiCpuBaseTask { AiCpuCCTask &operator=(const AiCpuCCTask &) = delete; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { return OP_TASK_AICPUCC; } - const void *GetIOAddr() const override; + void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; const void *GetArgs() const; void SetKernelArgs(std::unique_ptr args, size_t arg_size); void SetSoName(const std::string &so_name); void SetkernelName(const std::string &kernel_Name); - void SetIoAddr(void *io_addr); + void SetIoAddr(uintptr_t *io_addr); size_t GetArgSize() const; Status LaunchKernel(const std::vector &input_desc, @@ -244,7 +221,8 @@ private: std::unique_ptr args_; size_t arg_size_ = 0; void *sm_desc_ = nullptr; - void *io_addr_ = nullptr; + uintptr_t *io_addr_ = nullptr; + size_t io_addr_num_ = 0; bool is_custom_ = false; uint32_t dump_flag_ = RT_KERNEL_DEFAULT; }; From 3101d4d4b23e8084aa87c46146025e55984773a7 Mon Sep 17 00:00:00 2001 From: chuxing Date: Fri, 11 Dec 2020 20:56:03 +0800 Subject: [PATCH 091/127] update --- ge/single_op/single_op_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 6b4f6b04..a4a4b623 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -381,7 +381,7 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa } auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def); - auto ret = builder.BuildTask(*aicpucc_task, kernel_id); + auto ret = builder.BuildTask(*aicpucc_task, kernel_id, model_params_); if (ret != SUCCESS) { GELOGE(ret, "build aicpu_CC op task failed"); return ret; From 475867a7ed441c8c2d30a8ae4abb147cab4dfd21 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Dec 2020 21:12:46 +0800 Subject: [PATCH 092/127] Add pytorch input const. --- ge/generator/ge_generator.cc | 22 +++++++++++++++++++++- ge/graph/preprocess/graph_preprocess.cc | 12 ++++++++++++ metadef | 2 +- parser | 2 +- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index dc64aac1..e59026a1 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -156,7 +156,12 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen } string op_type; - if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { + bool is_const = false; + (void)AttrUtils::GetBool(tensor, CONST_ATTR_NAME_INPUT, in_const); + if (is_const) { + GELOGD("Get input[%d] is const", index); + op_type = CONSTANTOP; + } else if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { op_type = DATA; } @@ -165,6 +170,18 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen if (data_op == nullptr) { return FAILED; } + if (is_const) { + ConstGeTensorPtr tensor_value; + if(!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + GELOGE(FAILED, "Get value failed, node name:%s.", tensor.GetName().c_str()); + return FAILED; + } + if(!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + GELOGE(FAILED, "Set attr ATTR_NAME_WEIGHTS fail."); + return FAILED; + } + } + (void)AttrUtils::SetBool(data_op, "_is_single_op", true); GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); @@ -557,6 +574,9 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { + if (!is_offline) { + (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); + } if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { GELOGE(PARAM_INVALID, "input param is invalid when build single op!"); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 22ae62de..a1627536 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1797,6 +1797,18 @@ Status GraphPrepare::PrepareOptimize() { } void GraphPrepare::TypeConversionOfConstant() { + bool is_acl_comlpile = false; + for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { + // This can ensure that n is not a null pointer + // No Conversion when called by aclOpCompile + if (n->GetOpDesc()->GetType() == CONSTANT) { + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_comlpile)); + if ( is_acl_comlpile) { + return; + } + } + } + if (options_.train_graph_flag) { GELOGD("trans CONSTANT to CONSTANTOP in train."); for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { diff --git a/metadef b/metadef index bd2cfdfa..c85822cd 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit bd2cfdfa85a3d9dcbd7dc825f5759c7f8b3ffa9a +Subproject commit c85822cd5404e40cb4ff2bfc9483062648c13c57 diff --git a/parser b/parser index c78651fe..5bc8c38b 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit c78651fee671ac079c56d2c3ff0d0439ea82f2fa +Subproject commit 5bc8c38b37476e8f4b9391c96e4a2cca59e53d8e From a8497dae739a41399b3625831a4835c9b387e709 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Dec 2020 21:32:47 +0800 Subject: [PATCH 093/127] Add pytorch input const. --- ge/generator/ge_generator.cc | 6 +++--- ge/graph/preprocess/graph_preprocess.cc | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index e59026a1..47f7b217 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -157,7 +157,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen string op_type; bool is_const = false; - (void)AttrUtils::GetBool(tensor, CONST_ATTR_NAME_INPUT, in_const); + (void)AttrUtils::GetBool(tensor, CONST_ATTR_NAME_INPUT, is_const); if (is_const) { GELOGD("Get input[%d] is const", index); op_type = CONSTANTOP; @@ -172,11 +172,11 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen } if (is_const) { ConstGeTensorPtr tensor_value; - if(!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + if (!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) { GELOGE(FAILED, "Get value failed, node name:%s.", tensor.GetName().c_str()); return FAILED; } - if(!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) { + if (!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) { GELOGE(FAILED, "Set attr ATTR_NAME_WEIGHTS fail."); return FAILED; } diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index a1627536..ccce250d 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1801,11 +1801,9 @@ void GraphPrepare::TypeConversionOfConstant() { for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { // This can ensure that n is not a null pointer // No Conversion when called by aclOpCompile - if (n->GetOpDesc()->GetType() == CONSTANT) { - (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_comlpile)); - if ( is_acl_comlpile) { - return; - } + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_comlpile)); + if (is_acl_comlpile) { + return; } } From b4e85030cd7cbc108c0956195ec88e3d3b7c24d4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Dec 2020 21:39:20 +0800 Subject: [PATCH 094/127] Add pytorch input const. --- ge/graph/preprocess/graph_preprocess.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index ccce250d..c442afe8 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1797,12 +1797,12 @@ Status GraphPrepare::PrepareOptimize() { } void GraphPrepare::TypeConversionOfConstant() { - bool is_acl_comlpile = false; + bool is_acl_compile = false; for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { // This can ensure that n is not a null pointer // No Conversion when called by aclOpCompile - (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_comlpile)); - if (is_acl_comlpile) { + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile)); + if (is_acl_compile) { return; } } From 2bf9bc8a53fb3f70e6bf4384bff2719a5151269f Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Dec 2020 21:41:23 +0800 Subject: [PATCH 095/127] Add pytorch input const. --- ge/graph/preprocess/graph_preprocess.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index c442afe8..2ee5e330 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1801,7 +1801,7 @@ void GraphPrepare::TypeConversionOfConstant() { for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { // This can ensure that n is not a null pointer // No Conversion when called by aclOpCompile - (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile)); + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile); if (is_acl_compile) { return; } From 3f652155a5e696d780f05f1952631e3ff7b85835 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 12 Dec 2020 09:51:29 +0800 Subject: [PATCH 096/127] add debug info for mstune --- .../load/new_model_manager/davinci_model.cc | 16 +++--- .../new_model_manager/zero_copy_offset.cc | 2 + .../load/new_model_manager/zero_copy_offset.h | 5 +- ge/graph/partition/graph_partition.cc | 52 +++++++++---------- 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 72a562bf..e5b812fe 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2185,8 +2185,9 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data const std::vector &blobs = input_data.blobs; for (const auto &data : new_input_data_info_) { if (data.first >= blobs.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first); + GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), + new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, + data.second.GetOpName().c_str()); return FAILED; } @@ -2197,13 +2198,14 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data } uint64_t data_size = data.second.GetDataSize(); GE_CHK_BOOL_RET_STATUS(data_size >= data_buf.length, PARAM_INVALID, - "input data size(%lu) does not match model required size(%lu), ret failed.", data_buf.length, - data_size); + "input data size(%lu) does not match model required size(%lu), op_name(%s) ret failed.", + data_buf.length, data_size, data.second.GetOpName().c_str()); void *mem_addr = data.second.GetBasicAddr(); void *data_buf_addr = reinterpret_cast(reinterpret_cast(data_buf.data)); uint64_t data_buf_length = data_buf.length; - GELOGI("CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", - runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); + GELOGI("CopyPlainData memcpy graph_%u type[F] input[%s] rank[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", + runtime_param_.graph_id, data.second.GetOpName().c_str(), data.first, mem_addr, data_buf_addr, data_size, + data_buf_length); GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); } @@ -3444,7 +3446,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & } if (!CheckInputAndModelSize(buffer.length, data.second.GetDataSize(), is_dynamic)) { - GELOGE(FAILED, "Check input size and model size failed"); + GELOGE(FAILED, "Check input size and model size failed, op[%s]", data.second.GetOpName().c_str()); return FAILED; } diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index 970b292c..9cd3f30b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -35,6 +35,7 @@ Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p", op_desc->GetName().c_str(), output_size, virtual_addr); basic_addr_ = virtual_addr; + op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, @@ -82,6 +83,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector &input_size_list GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); basic_addr_ = virtual_addr_list[idx]; + op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index 025d1b14..fa80f28b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -66,9 +66,12 @@ class ZeroCopyOffset { int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + // name of op + std::string GetOpName() const { return op_name_; } private: void *basic_addr_ = nullptr; + std::string op_name_; uint32_t data_count_ = 0; std::vector> data_info_; vector relative_offset_; @@ -80,4 +83,4 @@ class ZeroCopyOffset { std::vector zero_copy_relative_offset_; }; } // namespace ge -#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ \ No newline at end of file +#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index 6a1fbb34..fbc13920 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -614,32 +614,32 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetParentNode(compute_graph->GetParentNode()); - (void) AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); - auto sgi = MakeShared(); - if (sgi == nullptr) { - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); - return FAILED; - } - // set engine name - sgi->SetEngineName(engine_name); - // set stream label - string sub_graph_stream; - if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { - sgi->SetStreamLabel(sub_graph_stream); - } - /// for now inputFlag is the same before and after partition. It should - /// be changed according to the real partition - std::vector sub_graph_input(graph_info_.input_size_, true); - std::vector sub_graph_output(graph_info_.output_size_, true); - sgi->SetSubGraph(sub_graph); - sgi->SetOutputFlag(sub_graph_output); - sgi->SetInputFlag(sub_graph_input); - sgi->SetOutputContext(graph_info_.output_name_); - AddEndPldInformationToSubGraphInfo(sgi); - GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", - engine_name.c_str(), - sub_graph->GetName().c_str(), - sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); + (void)AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); + GELOGD("set attr success. subgraph(%s) with parent graph(%s)", sub_graph->GetName().c_str(), + compute_graph->GetName().c_str()); + auto sgi = MakeShared(); + if (sgi == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); + return FAILED; + } + // set engine name + sgi->SetEngineName(engine_name); + // set stream label + string sub_graph_stream; + if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { + sgi->SetStreamLabel(sub_graph_stream); + } + /// for now inputFlag is the same before and after partition. It should + /// be changed according to the real partition + std::vector sub_graph_input(graph_info_.input_size_, true); + std::vector sub_graph_output(graph_info_.output_size_, true); + sgi->SetSubGraph(sub_graph); + sgi->SetOutputFlag(sub_graph_output); + sgi->SetInputFlag(sub_graph_input); + sgi->SetOutputContext(graph_info_.output_name_); + AddEndPldInformationToSubGraphInfo(sgi); + GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", engine_name.c_str(), + sub_graph->GetName().c_str(), sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); if (engine_name != input_subgraph_name) { // do not add Data subGraph into SubGraphInfo output_subgraphs.push_back(sgi); } else { From 4f2ff983a02716825f4c067aadd3e08093178fb6 Mon Sep 17 00:00:00 2001 From: dongduo Date: Sat, 12 Dec 2020 10:40:51 +0800 Subject: [PATCH 097/127] Fix code check --- ge/common/helper/model_helper.cc | 5 ++--- ge/graph/build/memory/binary_block_mem_assigner.cc | 2 +- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- .../task_info/super_kernel/super_kernel.cc | 4 ++-- ge/graph/manager/graph_manager.cc | 14 ++++---------- ge/graph/manager/util/debug.cc | 3 ++- ge/graph/passes/atomic_addr_clean_pass.cc | 3 ++- ge/host_kernels/ssd_prior_box_kernel.h | 4 ++-- ge/hybrid/executor/hybrid_execution_context.h | 3 ++- ge/hybrid/executor/hybrid_model_async_executor.cc | 4 ++-- ge/opskernel_manager/ops_kernel_manager.cc | 2 +- inc/external/ge/ge_ir_build.h | 6 +++--- 12 files changed, 24 insertions(+), 28 deletions(-) diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index fdfac68e..aacef88c 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -122,9 +122,8 @@ Status ModelHelper::SaveModelTbeKernel(std::shared_ptr &om_fil if (tbe_kernel_store.DataSize() > 0) { GE_CHK_STATUS_RET( SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, - ge_model->GetTBEKernelStore().Data(), - ge_model->GetTBEKernelStore().DataSize(), model_index), - "Add tbe kernel partition failed"); + ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(), + model_index), "Add tbe kernel partition failed"); } // no need to check value, DATA->NetOutput (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 570828eb..fff589f3 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -21,7 +21,7 @@ namespace { const uint32_t kRangeCeilInterval = 2; const uint32_t kLogBase = 2; -const int64_t kLargeBlockSize = 8 * 1024 * 1024; // 8M +const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024 const int64_t kLargeBlockRangeSize = 2; } // namespace diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 181347f4..102fd3c1 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -83,7 +83,7 @@ const uint32_t kAddrLen = sizeof(void *); const int kDecimal = 10; const int kBytes = 8; const uint32_t kDataMemAlignSizeCompare = 64; -const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; // 2M +const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index a4d14fb0..65dca3b3 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -28,8 +28,8 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { rtError_t rt_ret = rtMalloc(reinterpret_cast(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), - RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), reinterpret_cast(args), + sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 364a360f..2c2495b4 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -533,9 +533,8 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr return SUCCESS; } -Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, - Graph2SubGraphInfoList &sub_graph_map, - uint64_t session_id) { +Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, + Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { GE_CHECK_NOTNULL(compute_graph); // use default 16 multi thread const uint32_t thread_num = 16; @@ -549,12 +548,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr if (!op_compile_strategy.empty()) { (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, - this, - compute_graph->GetGraphID(), - subgraph, - compute_graph, - session_id, + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, + compute_graph->GetGraphID(), subgraph, compute_graph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -562,7 +557,6 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr } vector_future.emplace_back(std::move(f)); } - for (auto &function_graph : compute_graph->GetAllSubgraphs()) { auto subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { diff --git a/ge/graph/manager/util/debug.cc b/ge/graph/manager/util/debug.cc index 45c070c6..2c930d1f 100644 --- a/ge/graph/manager/util/debug.cc +++ b/ge/graph/manager/util/debug.cc @@ -32,7 +32,8 @@ Debug::~Debug() = default; void Debug::DumpProto(const Message &proto, const char *file) { std::string file_path = RealPath(file); - int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); + int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | + M_UMASK_OTHREAD); if (fd == -1) { GELOGW("Write %s failed", file_path.c_str()); return; diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 40931ff6..7c6ed8ce 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -93,7 +93,8 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); + GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), + op_info.engine.c_str()); return false; } } diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h index 3894bad9..c08217e2 100755 --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -100,8 +100,8 @@ class SsdPriorboxKernel : public Kernel { * @return OTHERS: Execution failed * @author */ - Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width, - int layer_height, int &num_priors, int &dim_size) const; + Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, + int layer_width, int layer_height, int &num_priors, int &dim_size) const; void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector &result); std::unique_ptr BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, float step_height, int img_width, int img_height, float offset, diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 0fa5a5d7..1fe40c77 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -57,7 +57,8 @@ struct GraphExecutionContext { do { \ if ((context != nullptr) && (context)->profiler != nullptr) { \ if (node_name != nullptr) { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, \ + ##__VA_ARGS__); \ } else { \ context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ }\ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index e03ebc31..ba717a2d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -384,8 +384,8 @@ Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, i, outputs[i].length, output_real_size); return FAILED; } - GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, - args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, + RT_MEMCPY_DEVICE_TO_DEVICE)); } outputs[i].length = output_real_size; } diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index e9c72a37..30f39c0d 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -176,7 +176,7 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, enable_flag = true; } else { GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", - plugin_name.c_str(), iter->second.c_str()); + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 778ec21d..8ceb95d3 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -24,9 +24,9 @@ #include "graph/ge_error_codes.h" namespace { -#define IR_MAJOR_VERSION (int(1)) -#define IR_MINOR_VERSION (int(0)) -#define IR_PATCH_VERSION (int(0)) +const int IR_MAJOR_VERSION = 1; +cosnt int IR_MINOR_VERSION = 0; +const int IR_PATCH_VERSION = 0; } // namespace namespace ge { From 148b8f3fd0aec8e3e178f4bde016eb3ff931f65d Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 12 Dec 2020 10:46:41 +0800 Subject: [PATCH 098/127] add aclgrphGenerateForOp --- ge/generator/ge_generator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index a1a45028..d4334c41 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -586,7 +586,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in string name = ge::CurrentTimeInStr() + "_" + model_file_name; Graph graph; if (BuildSingleOpGraph(op_desc, inputs, outputs, name, graph) != ge::SUCCESS) { - GELOGE(GRAPH_FAILED, "make graph fail."); + GELOGE(GRAPH_FAILED, "make graph fail."); return GRAPH_FAILED; } GELOGI("ATC parser success in single op build."); From 7d6619cb4c7f22a302f2fd9c1aec94bf57780e99 Mon Sep 17 00:00:00 2001 From: dongduo Date: Sat, 12 Dec 2020 10:47:44 +0800 Subject: [PATCH 099/127] Fix code check --- inc/external/ge/ge_ir_build.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 8ceb95d3..99a1cf18 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -25,7 +25,7 @@ namespace { const int IR_MAJOR_VERSION = 1; -cosnt int IR_MINOR_VERSION = 0; +const int IR_MINOR_VERSION = 0; const int IR_PATCH_VERSION = 0; } // namespace From 613e221a971dc46f3fcf0dc443e8dd1fdac6b5bf Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 12 Dec 2020 11:18:27 +0800 Subject: [PATCH 100/127] fix dynamic single op --- .../task/aicpu_kernel_task_builder.cc | 4 + ge/single_op/task/op_task.cc | 92 +++++++------------ ge/single_op/task/op_task.h | 3 +- 3 files changed, 38 insertions(+), 61 deletions(-) diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index c676ccf8..f8a2bd1b 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -97,6 +97,10 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons return ret; } + if (task.GetUnknownType() == DEPEND_COMPUTE) { + GELOGE(FAILED, "AiCpuCCTask unknown type is depend compute, it's not supported now."); + return FAILED; + } auto aicpu_param_head = reinterpret_cast(task.args_.get()); if (task.ext_info_addr_dev_ != nullptr) { aicpu_param_head->extInfoLength = kernel_ext_info.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index a714c6a8..22433ec9 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -454,6 +454,29 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor return SUCCESS; } +Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vector &outputs) { + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + + // input number and output number was check in ValidateParams + for (size_t i = 0; i < inputs.size(); ++i) { + auto addr = inputs[i].data; + GE_CHECK_NOTNULL(addr); + GELOGD("AICpuTask input[%zu] addr = %p", i, addr); + *arg_base++ = reinterpret_cast(addr); + } + + for (size_t i = 0; i < outputs.size(); ++i) { + auto addr = outputs[i].data; + GE_CHECK_NOTNULL(addr); + GELOGD("AICpuTask output[%zu] addr = %p", i, addr); + *arg_base++ = reinterpret_cast(addr); + } + + return SUCCESS; +} + AiCpuTask::~AiCpuTask() { FreeHbm(args_); FreeHbm(io_addr_); @@ -631,40 +654,6 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output return SUCCESS; } -Status AiCpuTask::SetIO(const vector &inputs, vector &outputs) { - vector io_addrs; - io_addrs.reserve(num_inputs_ + num_outputs_); - for (size_t i = 0; i < num_inputs_; ++i) { - GE_CHECK_NOTNULL(inputs[i]); - GELOGD("AiCpuTask input[%zu] addr = %p", i, inputs[i]); - io_addrs.emplace_back(reinterpret_cast(inputs[i])); - } - - if (unknown_type_ != DEPEND_COMPUTE) { - for (size_t i = 0; i < num_outputs_; ++i) { - GE_CHECK_NOTNULL(outputs[i]); - GELOGD("AiCpuTask output[%zu] addr = %p", i, outputs[i]); - io_addrs.emplace_back(reinterpret_cast(outputs[i])); - } - } else { - for (size_t i = 0; i < num_outputs_; ++i) { - void *summary_addr = output_summary_[i]; - io_addrs.emplace_back(reinterpret_cast(summary_addr)); - } - } - - if (!io_addrs.empty()) { - auto *dst_io_addr = const_cast(reinterpret_cast(io_addr_)); - GE_CHK_RT_RET(rtMemcpy(dst_io_addr, - sizeof(uint64_t) * io_addrs.size(), - &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), - RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHECK_NOTNULL(dst_io_addr); - }; - return SUCCESS; -} - Status AiCpuTask::InitForSummaryAndCopy() { if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); @@ -736,17 +725,17 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, std::vector &output_buffers, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); - std::vector inputs; - std::vector outputs; - for (auto &buffer : input_buffers) { - inputs.emplace_back(buffer.data); - } - for (auto &buffer : output_buffers) { - outputs.emplace_back(buffer.data); + if (unknown_type_ == DEPEND_COMPUTE) { + std::vector summary_buffers; + for (size_t i = 0; i < num_outputs_; ++i) { + summary_buffers.emplace_back(output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), false); + } + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, summary_buffers)); + } else { + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); } - GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); - GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); + GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); @@ -817,24 +806,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) { - GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", - unknown_type_); - GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); - - size_t arg_index = 0; - auto *task_io_addr = reinterpret_cast(io_addr_); - GE_CHECK_NOTNULL(task_io_addr); - for (auto &input : input_buffers) { - task_io_addr[arg_index++] = reinterpret_cast(input.data); - } - for (auto &output : output_buffers) { - task_io_addr[arg_index++] = reinterpret_cast(output.data); - } - + GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); - if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 04e0def2..e2122b6f 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -112,6 +112,7 @@ class AiCpuBaseTask : public OpTask { UnknowShapeOpType GetUnknownType() const { return unknown_type_; } protected: + Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id); Status UpdateExtInfo(const std::vector &input_desc, @@ -145,8 +146,6 @@ class AiCpuTask : public AiCpuBaseTask { Status SetMemCopyTask(const domi::KernelExDef &kernel_def); private: - Status SetIO(const vector &inputs, vector &outputs); - // for copy task. Status InitForSummaryAndCopy(); Status UpdateShapeAndDataByResultSummary(vector &output_desc, From 10d03f99b9d7aa23c7ae801770f2d83132260499 Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Sat, 12 Dec 2020 15:39:19 +0800 Subject: [PATCH 101/127] fix issue of task_id invalid --- ge/graph/load/new_model_manager/davinci_model.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index e29f4bc4..d0881d95 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3173,21 +3173,29 @@ Status DavinciModel::DistributeTask() { const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { + auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump - auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), - model_task_def->task(task_index).kernel_ex().op_index()); + auto op_index = std::max(task_def.kernel().context().op_index(), + task_def.kernel_ex().op_index()); OpDescPtr op = GetOpByIndex(op_index); GE_CHECK_NOTNULL(op); - SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); if (reinterpret_cast(task->GetDumpArgs()) != nullptr) { bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); if (call_dump || is_op_debug_reg_) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } + + auto task_type = static_cast(task_def.type()); + bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) + && (task_type != RT_MODEL_TASK_KERNEL_EX) + && (task_type != RT_MODEL_TASK_HCCL); + GE_IF_BOOL_EXEC(no_need_profiling, continue); + + SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); // Load task info for profiling TaskDescInfo task_desc_info; if (!om_name_.empty()) { @@ -3196,7 +3204,7 @@ Status DavinciModel::DistributeTask() { task_desc_info.model_name = name_; } task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); + task_desc_info.block_dim = task_def.kernel().block_dim(); task_desc_info.task_id = task->GetTaskID(); task_desc_info.stream_id = task->GetStreamId(); task_desc_info_.emplace_back(task_desc_info); From 07f5327b18593f9308ddf315278b40069fdd3bd9 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 15:52:41 +0800 Subject: [PATCH 102/127] profiling AR version 1 --- ge/CMakeLists.txt | 13 +- ge/client/module.mk | 1 - ge/common/profiling/ge_profiling.cc | 176 ++++++ ge/common/profiling/ge_profiling_cb.cc | 26 + ge/common/profiling/profiling_manager.cc | 513 +++++------------- ge/common/profiling/profiling_manager.h | 97 +--- ge/executor/CMakeLists.txt | 1 + ge/executor/ge_executor.cc | 2 +- ge/executor/module.mk | 1 + ge/ge_runner.mk | 3 +- .../load/new_model_manager/davinci_model.cc | 180 +++--- .../load/new_model_manager/davinci_model.h | 2 +- .../load/new_model_manager/model_manager.cc | 43 +- .../load/new_model_manager/model_manager.h | 2 - ge/hybrid/executor/worker/execution_engine.cc | 3 +- ge/init/gelib.cc | 6 +- ge/single_op/single_op.cc | 3 +- ge/stub/gen_stubapi.py | 2 +- inc/framework/common/profiling/ge_profiling.h | 45 ++ .../common/profiling/ge_profiling_cb.h | 24 + 20 files changed, 510 insertions(+), 633 deletions(-) create mode 100644 ge/common/profiling/ge_profiling.cc create mode 100644 ge/common/profiling/ge_profiling_cb.cc create mode 100644 inc/framework/common/profiling/ge_profiling.h create mode 100644 inc/framework/common/profiling/ge_profiling_cb.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index bd0b0c39..7079e432 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -60,6 +60,8 @@ set(TRAIN_SRC_LIST "common/dump/dump_manager.cc" "common/dump/dump_properties.cc" "common/dump/dump_op.cc" + "common/profiling/ge_profiling.cc" + "common/profiling/ge_profiling_cb.cc" "engine_manager/dnnengine_manager.cc" "ge_local_engine/engine/host_cpu_engine.cc" "generator/ge_generator.cc" @@ -332,7 +334,6 @@ set(TRAIN_SRC_LIST "hybrid/hybrid_davinci_model.cc" "executor/ge_executor.cc" "client/ge_api.cc" - "client/ge_prof.cc" "analyzer/analyzer.cc" "ir_build/ge_ir_build.cc" "ir_build/atc_ir_common.cc" @@ -649,7 +650,7 @@ target_link_libraries(ge_runner $ ge_memory adump_server - msprofiler + msprofiler_fwk static_mmpa -Wl,--no-as-needed graph @@ -658,7 +659,6 @@ target_link_libraries(ge_runner register c_sec slog - msprof runtime resource error_manager @@ -777,13 +777,12 @@ target_link_libraries(opensrc_ascendcl PRIVATE register_static error_manager_static adump_server - msprofiler + msprofiler_fwk -Wl,--no-whole-archive -Wl,--no-as-needed c_sec runtime slog - msprof ascend_hal_stub -Wl,--as-needed -lrt @@ -799,12 +798,10 @@ set_target_properties(opensrc_ascendcl PROPERTIES add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc - ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc COMMAND echo "Generating stub files." && ${HI_PYTHON} ${CMAKE_CURRENT_LIST_DIR}/stub/gen_stubapi.py ${GE_CODE_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} && mv ge_ir_build.cc stub_ge_ir_build.cc && mv ge_api.cc stub_ge_api.cc - && mv ge_prof.cc stub_ge_prof.cc && echo "Generating stub files end." #WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} #DEPENDS stub/gen_stubapi.py ${TOP_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} @@ -813,7 +810,6 @@ add_custom_command( add_custom_target(ge_stub DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc - ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc ) ################################################################## @@ -855,7 +851,6 @@ target_include_directories(atc_stub_ge_compiler PRIVATE ############ stub/libge_runner.so ############ add_library(fwk_stub_ge_runner SHARED stub_ge_api.cc - stub_ge_prof.cc stub_ge_ir_build.cc ) diff --git a/ge/client/module.mk b/ge/client/module.mk index 6ac69d31..c942d550 100644 --- a/ge/client/module.mk +++ b/ge/client/module.mk @@ -4,7 +4,6 @@ LOCAL_PATH := $(call my-dir) COMMON_LOCAL_SRC_FILES := \ proto/ge_api.proto \ ge_api.cc \ - ge_prof.cc \ COMMON_LOCAL_C_INCLUDES := \ diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc new file mode 100644 index 00000000..8da6b12a --- /dev/null +++ b/ge/common/profiling/ge_profiling.cc @@ -0,0 +1,176 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/profiling/ge_profiling.h" +#include "runtime/base.h" +#include "common/profiling/profiling_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "graph/load/graph_loader.h" +#include "init/gelib.h" +#include "framework/common/ge_inner_error_codes.h" + +namespace { +const uint32_t kDeviceListIndex = 3; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kProfModelSubscribe = "prof_model_subscribe"; +const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; +const std::string kRtSetDeviceRegName = "profiling"; + +const std::map kProfCommandTypeMap = { + {kProfCommandhandleInit, kProfilingInit}, + {kProfCommandhandleStart, kProfilingStart}, + {kProfCommandhandleStop, kProfilingStop}, + {kProfCommandhandleFinalize, kProfilingFinalize}, + {kProfCommandhandleModelSubscribe, kProfModelSubscribe}, + {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; +} // namespace + +bool TransProfConfigToParam(const MsprofCommandHandle &profCommand, vector &prof_config_params) { + prof_config_params.clear(); + prof_config_params.emplace_back(kDeviceNums); + prof_config_params.emplace_back(std::to_string(profCommand.devNums)); + prof_config_params.emplace_back(kDeviceIdList); + std::string devID = ""; + if (profCommand.devNums == 0) { + GELOGW("The device num is invalid."); + return false; + } + for (uint32_t i = 0; i < profCommand.devNums; i++) { + devID.append(std::to_string(profCommand.devIdList[i])); + if (i != profCommand.devNums - 1) { + devID.append(","); + } + } + + prof_config_params.push_back(devID); + return true; +} + +bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { + if (deviceid_list == nullptr) { + GELOGE(ge::PARAM_INVALID, "deviceIdList is nullptr"); + return false; + } + if (device_nums == 0 || device_nums > MAX_DEV_NUM) { + GELOGE(ge::PARAM_INVALID, "The device nums is invalid."); + return false; + } + + // real device num + int32_t dev_count = 0; + rtError_t rt_err = rtGetDeviceCount(&dev_count); + if (rt_err != RT_ERROR_NONE) { + GELOGE(ge::INTERNAL_ERROR, "Get the Device count fail."); + return false; + } + + if (device_nums > static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); + return false; + } + + std::unordered_set record; + for (size_t i = 0; i < device_nums; ++i) { + uint32_t dev_id = deviceid_list[i]; + if (dev_id >= static_cast(dev_count)) { + GELOGE(ge::PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); + return false; + } + if (record.count(dev_id) > 0) { + GELOGE(ge::PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); + return false; + } + record.insert(dev_id); + } + return true; +} + +ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { + if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { + GELOGW("Msprof ctrl callback is exist, just ignore it."); + } else { + ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); + } + return ge::SUCCESS; +} + +ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { + // Pass MsprofSetDeviceCallback to runtime + ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast(func)); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); + return rt_ret; + } + return ge::SUCCESS; +} + +ge::Status RegProfReporterCallback(MsprofReporterCallback func) { + if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { + GELOGW("Msprof ctrl callback is exist, just ignore it."); + } else { + ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); + } + // Pass MsprofReporterCallback to runtime + ge::Status rt_ret = rtSetMsprofReporterCallback(func); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); + return rt_ret; + } + // Pass MsprofReporterCallback to hccl in opskernel so initialize + + return ge::SUCCESS; +} + +ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { + GE_CHECK_NOTNULL(data); + MsprofCommandHandle *prof_config_param = (MsprofCommandHandle *)data; + if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { + return ge::FAILED; + } + std::vector prof_params; + if (!TransProfConfigToParam(*prof_config_param, prof_params)) { + GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); + return ge::PARAM_INVALID; + } + auto iter = kProfCommandTypeMap.find(type); + if (iter == kProfCommandTypeMap.end()) { + GELOGW("The prof comand type is invalid."); + return ge::PARAM_INVALID; + } + ge::GraphLoader graph_loader; + ge::Command command; + command.cmd_params.clear(); + command.cmd_type = iter->second; + command.cmd_params = prof_params; + command.module_index = prof_config_param->profSwitch; + GELOGI("GE commandhandle execute, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); + ge::Status ret = graph_loader.CommandHandle(command); + if (ret != ge::SUCCESS) { + GELOGE(ret, "Handle profiling command failed"); + return ge::FAILED; + } + + GELOGI("Successfully execute profiling command 0x%llx.", command.module_index); + return ge::SUCCESS; +} + diff --git a/ge/common/profiling/ge_profiling_cb.cc b/ge/common/profiling/ge_profiling_cb.cc new file mode 100644 index 00000000..230e129d --- /dev/null +++ b/ge/common/profiling/ge_profiling_cb.cc @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/profiling/ge_profiling_cb.h" +#include "init/gelib.h" + +bool IsInitialize() { + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) { + return false; + } + return true; +} diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index ba03d671..bcf6d366 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -45,7 +45,10 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() : subscribe_count_(0) {} +ProfilingManager::ProfilingManager() : is_load_profiling_(false), + is_execute_profiling_(false), + is_training_trace_(false), + subscribe_count_(0) {} ProfilingManager::~ProfilingManager() {} @@ -58,44 +61,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); subscribe_count_ = 0; - job_id_ = options.job_id; + GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); - GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); - - - - Status ret; - if (!recv_profiling_config_.empty()) { - GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); - ret = InitFromAclCfg(recv_profiling_config_); - } else { - ret = InitFromOptions(options); - if (ret == SUCCESS && is_load_profiling_) { - device_id_.push_back(options.device_id); - } - } + struct MsprofGeOptions prof_conf = { 0 }; + Status ret = InitFromOptions(options, prof_conf); if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); return ret; } if (is_load_profiling_) { - // register Framework to profiling - int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != 0) { - GELOGE(FAILED, "Register profiling engine failed."); + int32_t cb_ret = prof_cb_.msprofCtrlCallback( + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), + static_cast(&prof_conf), sizeof(MsprofGeOptions)); + if (cb_ret != 0) { + GELOGE(FAILED, "call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); return FAILED; } - // profiling startup first time - GELOGI("Begin to init profiling, device num %zu", device_id_.size()); - for (size_t i = 0; i < device_id_.size(); ++i) { - ret = StartProfiling(0, device_id_[i]); - if (ret != SUCCESS) { - GELOGW("Profiling start failed on device %d.", device_id_[i]); - continue; - } - GELOGI("Profiling init succ on device %d.", device_id_[i]); - } + GELOGI("Profiling init success"); } else { GELOGI("The profiling is off, skip the initialization"); } @@ -103,264 +87,55 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg( - const std::string &config) { +ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) { #ifdef DAVINCI_SUPPORT_PROFILING - try { - is_load_profiling_ = false; - is_execute_profiling_ = false; - profiling_opts_.clear(); - op_trace_conf_.clear(); - Json start_prof_conf = Json::parse(config); - Json &prof_conf = start_prof_conf[kStartCfg][0]; - job_id_ = prof_conf[kJobID]; - auto iter = prof_conf.find(kProfDir); - if (iter != prof_conf.end()) { - prof_dir_ = prof_conf[kProfDir]; - } - Json &device_id = prof_conf[kDeviceID]; - if (device_id.size() != 0) { - vector().swap(device_id_); - bool is_all = false; - for (size_t i = 0; i < device_id.size(); i++) { - std::string device_id_str = device_id[i].get(); - if (device_id_str == "all") { - is_all = true; - break; - } - device_id_.push_back(std::stoi(device_id_str)); - } - if (is_all) { - int32_t count = 0; - rtError_t rt_err = rtGetDeviceCount(&count); - if (rt_err != RT_ERROR_NONE) { - GELOGE(FAILED, "Call rtGetDeviceCount to get device failed."); - } + // enable profiling by env + char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 }; + is_load_profiling_ = false; + is_execute_profiling_ = false; - vector().swap(device_id_); - for (int32_t i = 0; i < count; ++i) { - device_id_.push_back(i); - } - } - } + (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); + (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, sizeof(MsprofGeOptions)); - Json &features = prof_conf[kFeatures]; - if (ParseFeaturesFromAclCfg(features) != SUCCESS) { - GELOGE(FAILED, "Parse feature from acl cfg failed."); - return FAILED; - } + if ((env_profiling_mode != nullptr) && (strcmp("true", env_profiling_mode) == 0) + && (strcmp(prof_conf.options, "\0") != 0)) { + // enable profiling by env is_load_profiling_ = true; is_execute_profiling_ = true; - } catch (...) { - GELOGE(FAILED, "Json conf is not invalid !"); - return ge::PARAM_INVALID; - } -#endif - return ge::SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( - const Json &features) { -#ifdef DAVINCI_SUPPORT_PROFILING - try { - for (size_t i = 0; i < features.size(); ++i) { - const Json &feature = features[i]; - if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { - continue; - } - const std::string &name = feature[kName]; - if (name == "op_trace") { - const Json &conf = feature[kConf]; - const Json &events = conf[0][kEvents]; - const std::string &ai_core_events = events[0][kAiCoreEvents]; - GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); - is_op_trace_ = true; - ProfMgrConf prof_mgr_conf; - int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); - if (result != 0) { - GELOGE(FAILED, "ProfMgrGetConf failed."); - return FAILED; - } - op_trace_conf_ = prof_mgr_conf.conf; - op_trace_iter_num_ = static_cast(op_trace_conf_.size()); - GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); - } else if (name == "task_trace") { - is_op_trace_ = false; - if (feature.find(kConf) != feature.end()) { - const Json &conf = feature[kConf]; - std::stringstream task_trace_conf; - task_trace_conf << conf; - task_trace_conf_ = task_trace_conf.str(); - } - GELOGI("Task trace config from acl"); - } else if (name == "system_trace") { - is_op_trace_ = false; - const Json &conf = feature[kConf]; - std::stringstream system_trace_conf; - system_trace_conf << conf; - system_trace_conf_ = system_trace_conf.str(); - GELOGI("System trace config from acl"); - } - profiling_opts_.push_back(name); - } - } catch (...) { - GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); - return ge::PARAM_INVALID; - } -#endif - return ge::SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) { -#ifdef DAVINCI_SUPPORT_PROFILING - // enable profiling support two ways: env and front end - char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 }; - char prof_options_temp[MMPA_MAX_PATH] = { 0x00 }; - (void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH); - (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH); - const char *profiling_mode = profiling_mode_temp; - const char *prof_options = prof_options_temp; - if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { - is_load_profiling_ = false; - is_execute_profiling_ = false; + GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); } else { - std::string prof_options_str = std::string(prof_options); - profiling_opts_ = StringUtils::Split(prof_options_str, ':'); - is_load_profiling_ = true; - is_execute_profiling_ = true; - GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); - } - if (!is_load_profiling_) { - const std::string enable_profiling = "1"; - if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { - is_load_profiling_ = false; - is_execute_profiling_ = false; + if (options.profiling_mode != "1" || options.profiling_options.empty()) { return SUCCESS; - } else { - profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); - is_load_profiling_ = true; - is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); } - } - // features:'training_trace', 'task_trace' or 'op_trace' etc - if (!profiling_opts_.empty()) { - if (profiling_opts_[0] == "op_trace") { - is_op_trace_ = true; - // op trace get conf - ProfMgrConf prof_mgr_conf; - int result = ProfMgrGetConf("", &prof_mgr_conf); - if (result != 0) { - GELOGE(FAILED, "ProfMgrGetConf failed."); - return FAILED; - } - op_trace_conf_ = prof_mgr_conf.conf; - op_trace_iter_num_ = static_cast(op_trace_conf_.size()); - GELOGI("op trace profiling iter num %d,", op_trace_iter_num_); - } else { - is_op_trace_ = false; - op_trace_iter_num_ = 1; + // enable profiling by ge option + if (memcpy_s(prof_conf.options, sizeof(prof_conf.options), options.profiling_options.c_str(), + sizeof(options.profiling_options.c_str())) != EOK) { + GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); + return INTERNAL_ERROR; } + is_load_profiling_ = true; + is_execute_profiling_ = true; + GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), prof_conf.options); } -#endif - return ge::SUCCESS; -} -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::StartProfiling(int32_t iter_num, - int32_t device_id) { -#ifdef DAVINCI_SUPPORT_PROFILING - if (!profiling_opts_.empty()) { - GELOGI("Start profiling index is %d", iter_num); - // current one docker only use one device - Json p_device; - - try { - // profiling need physical_device_id - p_device[kDeviceID] = std::to_string(device_id); - p_device[kJobID] = job_id_; - p_device[kTraceID] = std::to_string(GetContext().TraceId()); - if (!prof_dir_.empty()) { - p_device[kProfDir] = prof_dir_; - GELOGI("Prof dir: %s.", prof_dir_.c_str()); - } - - Json features; - if (is_op_trace_) { - Json f; - f[kName] = "op_trace"; - Json conf; - if (op_trace_conf_.size() <= static_cast(iter_num)) { - GELOGE(FAILED, "Op trace iter num is invalid!"); - return FAILED; - } - Json events; - events[0] = nlohmann::json::parse(op_trace_conf_[iter_num]); - conf[0][kEvents] = events; - f[kConf] = conf; - features[0] = f; - if (iter_num == 0) { - is_load_ = true; - } - } else { - for (std::vector::size_type i = 0; i < profiling_opts_.size(); i++) { - Json f; - if (profiling_opts_[i] == "system_trace") { - f[kConf] = nlohmann::json::parse(system_trace_conf_); - } else if (profiling_opts_[i] == "task_trace") { - if (!task_trace_conf_.empty()) { - f[kConf] = nlohmann::json::parse(task_trace_conf_); - } - } - f[kName] = profiling_opts_[i]; - features[i] = f; - } - is_load_ = true; - } - p_device[kFeatures] = features; - // only one device, but sProfMgrStartUp API require for device list - Json devices; - devices[0] = p_device; - - Json start_cfg; - start_cfg[kStartCfg] = devices; - - // convert json to string - std::stringstream ss; - ss << start_cfg; - send_profiling_config_ = ss.str(); - GELOGI("Profiling config %s\n", send_profiling_config_.c_str()); - } catch (...) { - GELOGE(FAILED, "Op trace json conf is not invalid !"); - return FAILED; - } + if (!is_load_profiling_) { + return SUCCESS; + } - // runtime startup for profiling - uint64_t module = GetProfilingModule(); - int32_t device_num = 1; - uint32_t device_id_rt = static_cast(device_id); - GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt)); - - // call profiling startup API - ProfMgrCfg prof_cfg = {send_profiling_config_}; - void *prof_handle = ProfMgrStartUp(&prof_cfg); - if (prof_handle == nullptr) { - GELOGW("ProfMgrStartUp failed on device %d ", device_id); - return FAILED; - } - GELOGD("StartProfiling, prof_handle: %p", prof_handle); - prof_handle_vec_.push_back(prof_handle); + if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(), + sizeof(options.job_id.c_str())) != EOK) { + GELOGE(INTERNAL_ERROR, "copy job_id failed."); + return INTERNAL_ERROR; } + + // get traceId from options + prof_conf.traceId = GetContext().TraceId(); #endif - return SUCCESS; + return ge::SUCCESS; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() { #ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - if (reporter != nullptr) { - int ret = reporter->Flush(); - GELOGI("Report data end, ret is %d", ret); - } uint64_t module = GetProfilingModule(); int32_t device_num = static_cast(device_id_.size()); auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); @@ -376,15 +151,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); } - for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { - int result = ProfMgrStop(prof_handle_vec_[i]); - if (result != 0) { - GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]); - } + // stop profiling + int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", + static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); + return; } - vector().swap(prof_handle_vec_); - is_load_ = false; - recv_profiling_config_ = ""; GELOGI("Stop Profiling success."); #endif } @@ -392,12 +166,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - if (reporter == nullptr) { - GELOGI("Profiling report is nullptr!"); - return; - } - std::string data; for (const auto &task : task_desc_info) { std::string model_name = task.model_name; @@ -412,7 +180,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append("\n")); - Msprof::Engine::ReporterData reporter_data{}; + ReporterData reporter_data; reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); @@ -422,9 +190,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin return; } - ret = reporter->Report(&reporter_data); - if (ret != SUCCESS) { - GELOGE(ret, "Reporter data of task_desc_info fail!"); + int32_t cb_ret = CallMsprofReport(reporter_data); + if (cb_ret != 0) { + GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); return; } } @@ -436,9 +204,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); - std::string data; for (const auto &graph : compute_graph_desc_info) { data.append("model_name:") @@ -493,64 +258,52 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } data.append(" model_id:").append(std::to_string(model_id)); - data.append("\n"); - Msprof::Engine::ReporterData reporter_data{}; - Report(device_id, data, *reporter, reporter_data); - + GraphDescReport(device_id, data); data.clear(); } #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Report( - const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data) { +void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { #ifdef DAVINCI_SUPPORT_PROFILING + ReporterData reporter_data; + int ret = -1; + int32_t cb_ret = -1; size_t index = data.size() / kReportMaxLen; if (index >= 1) { reporter_data.deviceId = device_id; - int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); for (size_t i = 0; i < index; ++i) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; reporter_data.dataLen = kReportMaxLen; - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } reporter_data.dataLen = data.size() - kReportMaxLen * index; if (reporter_data.dataLen != 0) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } } else { reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); - int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); - ret = reporter.Report(&reporter_data); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;); - } -#endif -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit(const std::string &module) const { -#ifdef DAVINCI_SUPPORT_PROFILING - int ret = Msprof::Engine::UnInit(module); - if (ret != SUCCESS) { - GELOGE(ret, "profiling plugin uninit failed, ret:%d", ret); + cb_ret = CallMsprofReport(reporter_data); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); } #endif } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info, - bool check_device) { + const std::vector &compute_graph_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -559,13 +312,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGD("current logic_device_id:%d", logic_device_id); - if (check_device) { - auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); - if (ret == device_id_.end()) { - GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); - return; - } - } GELOGD("start ProfilingTaskDescInfo."); ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); GELOGD("start ProfilingGraphDescInfo."); @@ -574,11 +320,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig( - const std::string &profiling_cfg) { - recv_profiling_config_ = profiling_cfg; -} - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | @@ -594,9 +335,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP return module; } -void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, - uint32_t device_id, - uint64_t module) { +void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING if (prof_type == kProfModelSubscribe) { if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { @@ -608,9 +347,13 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, subs_dev_module_[device_id] = dev_info; } } else if (prof_type == kProfModelUnsubscribe) { - if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { - if (subs_dev_module_[device_id].subscribe_count > 0) { - subs_dev_module_[device_id].subscribe_count--; + auto iter = subs_dev_module_.find(device_id); + if (iter != subs_dev_module_.end()) { + if (iter->second.subscribe_count > 0) { + iter->second.subscribe_count--; + } + if (iter->second.subscribe_count == 0) { + subs_dev_module_.erase(iter); } } } else { @@ -626,10 +369,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { // register framework to profiling - int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != SUCCESS) { - GELOGE(FAILED, "Register profiling engine failed."); - return FAILED; + // register Framework to profiling + int32_t cb_ret = PluginInit(); + if (cb_ret != 0) { + GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret); + return cb_ret; } GELOGI("Prof subscribe: model load profiling on."); } @@ -647,7 +391,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); // Report profiling data - Status p_ret = davinci_model->ReportProfilingData(false); + Status p_ret = davinci_model->ReportProfilingData(); if (p_ret != SUCCESS) { GELOGE(p_ret, "Report profiling data failed."); return p_ret; @@ -672,6 +416,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo auto iter = subs_dev_module_.find(device[0]); if (iter != subs_dev_module_.end()) { if (subs_dev_module_[device[0]].subscribe_count == 1) { + // The same device_id, only stop at last time rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler stop failed."); @@ -679,15 +424,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo } } UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); + } else { + GELOGE(FAILED, "The device_id:%u has not been subscribed, do not need to cancel.", device[0]); + return FAILED; } subscribe_count_--; if (subscribe_count_ == 0) { - int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); - return ret; - } + // profiling plugin uninit at last subscription + PluginUnInit(); } #endif return SUCCESS; @@ -700,11 +445,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn if (model_load_mask == PROF_MODEL_LOAD_MASK) { // register Framework to profiling - int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); - if (result != SUCCESS) { - GELOGE(FAILED, "Register profiling engine failed."); - return FAILED; + int32_t cb_ret = PluginInit(); + if (cb_ret != 0) { + GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret); + return cb_ret; } + int32_t device_num = -1; rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr); if (rt_ret != RT_ERROR_NONE) { @@ -719,7 +465,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { is_training_trace_ = true; } - is_acl_api_mode_ = true; GELOGI("Prof init success."); #endif return SUCCESS; @@ -730,12 +475,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi std::lock_guard lock(mutex_); is_load_profiling_ = false; is_training_trace_ = false; - is_acl_api_mode_ = false; - int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); - } + // profiling plugin uninit + PluginUnInit(); + int32_t dev_num = -1; rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); if (rt_ret != RT_ERROR_NONE) { @@ -859,7 +602,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num); + GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { @@ -878,7 +621,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGW("Prof start: load model module is invalid."); } UpdateDeviceIdModuleMap(kProfStart, module, device_list); - GELOGD("Prof start profiling success."); + GELOGI("Prof start profiling success."); #endif return SUCCESS; } @@ -901,7 +644,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); + GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); @@ -921,7 +664,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGW("Prof stop: load model module is invalid."); } UpdateDeviceIdModuleMap(kProfStop, module, device_list); - GELOGD("Prof stop profiling success."); + GELOGI("Prof stop profiling success."); #endif return SUCCESS; } @@ -963,47 +706,43 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); } - GELOGD("Current logic_device_id:%d", logic_device_id); + GELOGI("Current logic_device_id:%d", logic_device_id); bool execute_model_prof_on = false; auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (iter != device_id_.end()) { execute_model_prof_on = true; } - GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); + GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); return is_execute_profiling_ || execute_model_prof_on; } -/** - * @brief Profiling PluginImpl - */ -// PluginImpl static variable init -Msprof::Engine::Reporter *PluginImpl::reporter_ = nullptr; - -PluginImpl::PluginImpl(const std::string &module) : module_(module) { GELOGI("Create PluginImpl\n"); } - -int PluginImpl::Init(const Msprof::Engine::Reporter *reporter) { - GELOGI("PluginImpl init"); - reporter_ = const_cast(reporter); - return 0; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { + return prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), + nullptr, 0); } -int PluginImpl::UnInit() { - GELOGI("PluginImpl Uninit"); - reporter_ = nullptr; - return 0; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const { +#ifdef DAVINCI_SUPPORT_PROFILING + int32_t cb_ret = prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), + nullptr, 0); + if (cb_ret != 0) { + GELOGW("profiling plugin uninit failed, ret:%d", cb_ret); + } +#endif } -Msprof::Engine::PluginIntf *ProfilingEngineImpl::CreatePlugin() { - GELOGI(" Create Plugin"); - return new (std::nothrow) PluginImpl(GE_PROFILING_MODULE); +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( + ReporterData &reporter_data) const { + return prof_cb_.msprofReporterCallback( + static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), + static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), + static_cast(&reporter_data), sizeof(ReporterData)); } -int ProfilingEngineImpl::ReleasePlugin(Msprof::Engine::PluginIntf *plugin) { - if (plugin != nullptr) { - delete plugin; - plugin = nullptr; - } - return 0; -} + } // namespace ge diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 66cefc32..c9434a10 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -26,9 +26,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_types.h" #include "external/register/register_types.h" -#include "toolchain/prof_engine.h" -#include "toolchain/prof_mgr_core.h" -#include "toolchain/prof_acl_api.h" +#include "toolchain/prof_callback.h" using std::map; using std::string; @@ -43,29 +41,10 @@ struct DeviceSubsInfo { uint64_t module; uint32_t subscribe_count; }; -// register Plugin -class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { - public: - explicit PluginImpl(const std::string &module); - ~PluginImpl() {} - - int Init(const Msprof::Engine::Reporter *reporter); - int UnInit(); - static Msprof::Engine::Reporter *GetPluginReporter() { return reporter_; } - - private: - static Msprof::Engine::Reporter *reporter_; - std::string module_; -}; -// register Engine -class ProfilingEngineImpl : public Msprof::Engine::EngineIntf { - public: - ProfilingEngineImpl() {} - ~ProfilingEngineImpl() {} - - Msprof::Engine::PluginIntf *CreatePlugin(); - int ReleasePlugin(Msprof::Engine::PluginIntf *plugin); +struct MsprofCallback { + MsprofCtrlCallback msprofCtrlCallback; + MsprofReporterCallback msprofReporterCallback; }; class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { @@ -73,68 +52,50 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - ge::Status Init(const Options &options); - ge::Status InitFromOptions(const Options &options); - ge::Status InitFromAclCfg(const std::string &config); - ge::Status StartProfiling(int32_t iter, int32_t device_id); - void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); - ge::Status ProfModelSubscribe(uint64_t module, void *model); - ge::Status ProfModelUnsubscribe(void *model); - ge::Status ProfInit(uint64_t module); - ge::Status ProfFinalize(); - ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); - ge::Status ProfStopProfiling(uint64_t module, const std::map &config_para); + Status Init(const Options &options); + Status ProfInit(uint64_t module); + Status ProfFinalize(); + Status ProfStartProfiling(uint64_t module, const std::map &config_para); + Status ProfStopProfiling(uint64_t module, const std::map &config_para); + Status ProfModelSubscribe(uint64_t module, void *model); + Status ProfModelUnsubscribe(void *model); void StopProfiling(); - bool ProfilingOpTraceOn() const { return is_op_trace_; } - bool ProfilingLoadFlag() const { return is_load_; } bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern - bool IsAclApiMode() const { return is_acl_api_mode_; } - int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info, - bool check_device); - void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data); + const std::vector &compute_graph_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id); void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id); - void SetProfilingConfig(const string &profiling_cfg); - vector GetProfilingDeviceId() const { return device_id_; } - void PluginUnInit(const std::string &module) const; + Status PluginInit() const; + void PluginUnInit() const; + Status CallMsprofReport(ReporterData &reporter_data) const; + struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } + void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } + void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } private: - ge::Status ParseFeaturesFromAclCfg(const Json &feature); - ge::Status ProfParseParam(const std::map &config_para, int32_t &device_num, - vector &device_list); - ge::Status ProfParseDeviceId(const std::map &config_para, + Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); + Status ProfParseParam(const std::map &config_para, int32_t &device_num, + vector &device_list); + Status ProfParseDeviceId(const std::map &config_para, vector &device_list); uint64_t GetProfilingModule(); + void GraphDescReport(const int32_t &device_id, const string &data); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); - bool is_load_profiling_ = false; - bool is_execute_profiling_ = false; - bool is_op_trace_ = false; - bool is_load_ = false; - bool is_training_trace_ = false; - bool is_acl_api_mode_ = false; - int32_t op_trace_iter_num_ = 0; - string job_id_; - string prof_dir_; + void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + + bool is_load_profiling_; + bool is_execute_profiling_; + bool is_training_trace_; vector device_id_; - vector op_trace_conf_; - vector profiling_opts_; - vector prof_handle_vec_; - string recv_profiling_config_; - string send_profiling_config_; - string system_trace_conf_; - string task_trace_conf_; - const ProfilingEngineImpl engine_; map device_id_module_map_; // key: device_id, value: profiling on module map subs_dev_module_; // key: device_id, value: profiling on module uint32_t subscribe_count_; std::mutex mutex_; + MsprofCallback prof_cb_; }; } // namespace ge #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index d7dfdc84..29215cc1 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -17,6 +17,7 @@ set(SRC_LIST "../common/dump/dump_properties.cc" "../common/dump/dump_manager.cc" "../common/dump/dump_op.cc" + "../common/profiling/ge_profiling.cc" "../graph/load/graph_loader.cc" "../graph/execute/graph_execute.cc" "../omm/csa_interact.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 26b0a323..57ab7800 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -304,7 +304,7 @@ Status GeExecutor::Finalize() { // Stop profiling if (ProfilingManager::Instance().ProfilingOn()) { ProfilingManager::Instance().StopProfiling(); - ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); + ProfilingManager::Instance().PluginUnInit(); } GELOGI("Uninit GeExecutor over."); diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 9566ca64..cc64007e 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -8,6 +8,7 @@ local_ge_executor_src_files := \ ../common/dump/dump_op.cc \ ../common/ge/plugin_manager.cc \ ../common/ge/op_tiling_manager.cc \ + ../common/profiling/ge_profiling.cc \ ../graph/load/graph_loader.cc \ ../graph/execute/graph_execute.cc \ ../omm/csa_interact.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 5205a06c..5e700f2b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -29,6 +29,7 @@ LIBGE_LOCAL_SRC_FILES := \ common/dump/dump_manager.cc \ common/dump/dump_properties.cc \ common/dump/dump_op.cc \ + common/profiling/ge_profiling.cc \ engine_manager/dnnengine_manager.cc \ ge_local_engine/engine/host_cpu_engine.cc \ generator/ge_generator.cc \ @@ -307,7 +308,6 @@ LIBGE_LOCAL_SRC_FILES := \ LIBCLIENT_LOCAL_SRC_FILES := \ proto/ge_api.proto \ client/ge_api.cc \ - client/ge_prof.cc \ RUNNER_LOCAL_C_INCLUDES := \ $(LOCAL_PATH) ./ \ @@ -409,7 +409,6 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ - ../../out/ge/lib64/stub/ge_prof.cc \ ../../out/ge/lib64/stub/ge_ir_build.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index d0881d95..18435571 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -712,7 +712,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size // collect profiling for ge auto &profiling_manager = ProfilingManager::Instance(); if (profiling_manager.ProfilingModelLoadOn()) { - Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); + Status p_ret = ReportProfilingData(); if (p_ret != SUCCESS) { GELOGE(p_ret, "Report profiling data failed."); return p_ret; @@ -723,14 +723,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } -Status DavinciModel::ReportProfilingData(bool check_device) { +Status DavinciModel::ReportProfilingData() { std::vector compute_graph_desc_info; Status ret = GetComputeGraphInfo(compute_graph_desc_info); if (ret != SUCCESS) { GELOGE(ret, "GetComputeGraphInfo failed."); return ret; } - ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); op_list_.clear(); @@ -2250,10 +2250,8 @@ inline int64_t SumSize(const vector &size_list) { Status DavinciModel::SinkModelProfile() { // profiling plugin must be registered - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); - - Msprof::Engine::ReporterData reporter_data{}; + auto &prof_mgr = ProfilingManager::Instance(); + ReporterData reporter_data; // report model data tag name std::string tag_name; tag_name.append("model_load_info_").append(std::to_string(this->Id())); @@ -2271,32 +2269,32 @@ Status DavinciModel::SinkModelProfile() { reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)name.c_str(); reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); uint32_t model_id = this->Id(); reporter_data.data = (unsigned char *)&model_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // Load Start/End Time int64_t start_time = this->GetLoadBeginTime(); reporter_data.data = (unsigned char *)&start_time; reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); int64_t end_time = this->GetLoadEndTime(); reporter_data.data = (unsigned char *)&end_time; reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); int32_t task_num = task_list_.size(); std::multimap op_id_map; @@ -2310,6 +2308,7 @@ Status DavinciModel::SinkModelProfile() { uint32_t op_num = fusion_op_info->original_op_names.size(); uint32_t task_id = task->GetTaskID(); if (op_num > 0) { + GELOGI("task.id = %u, opNum = %u", task_id, op_num); op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); } } @@ -2352,39 +2351,39 @@ Status DavinciModel::SinkModelProfile() { int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); reporter_data.data = (unsigned char *)&fusion_op_name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)fusion_op_name.c_str(); reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // original op name before fusion reporter_data.data = (unsigned char *)&op_num; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); for (uint32_t k = 0; k < op_num; k++) { std::string op_name = fusion_op_info->original_op_names[k]; int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); reporter_data.data = (unsigned char *)&op_name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)op_name.c_str(); reporter_data.dataLen = op_name_len; - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); } // stream id info uint32_t streamId = task->GetStreamId(); reporter_data.data = (unsigned char *)&streamId; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // memory info struct memoryInfo memory_info; @@ -2400,22 +2399,22 @@ Status DavinciModel::SinkModelProfile() { memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; reporter_data.data = (unsigned char *)&memory_info; reporter_data.dataLen = sizeof(struct memoryInfo); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // task info reporter_data.data = (unsigned char *)&task_count; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); Range task_range = op_id_map.equal_range(op_id); for (CIT idx = task_range.first; idx != task_range.second; ++idx) { uint32_t task_id = idx->second; reporter_data.data = (unsigned char *)&task_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); } } } @@ -2424,10 +2423,8 @@ Status DavinciModel::SinkModelProfile() { Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { // profiling plugin must be registered - Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); - GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); - - Msprof::Engine::ReporterData reporter_data{}; + auto &prof_mgr = ProfilingManager::Instance(); + ReporterData reporter_data; // report model data tag name std::string tag_name; tag_name.append("model_time_info_") @@ -2450,33 +2447,33 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { size_t name_len = name.size(); reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); reporter_data.data = (unsigned char *)name.c_str(); reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", - this->Id()); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); // request id uint64_t request_id = current_data.request_id; reporter_data.data = (unsigned char *)&request_id; reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); // thread id int32_t thread_id = GetDataInputTid(); reporter_data.data = (unsigned char *)&thread_id; reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); // time info time_info_.modelId = this->Id(); reporter_data.data = (unsigned char *)&time_info_; reporter_data.dataLen = sizeof(struct timeInfo); - GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); return SUCCESS; @@ -2809,71 +2806,32 @@ void *DavinciModel::Run(DavinciModel *model) { } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); - if (ProfilingManager::Instance().ProfilingOpTraceOn()) { - GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); - for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { - if (!ProfilingManager::Instance().ProfilingLoadFlag()) { - vector prof_device_id_vec = ProfilingManager::Instance().GetProfilingDeviceId(); - for (size_t j = 0; j < prof_device_id_vec.size(); ++j) { - // just profiling, no need to check value - (void)ProfilingManager::Instance().StartProfiling(i, prof_device_id_vec[j]); - } - } - - GELOGI("rtModelExecute start."); - rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - continue); // [No need to check value] - GELOGI("rtModelExecute end"); - - GELOGI("rtStreamSynchronize start."); - rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { - GELOGI("The model with multiple datasets aborts normally."); - } else { - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); - continue); // [No need to check value] - } - - GELOGI("rtStreamSynchronize end."); - (void)ProfilingManager::Instance().StopProfiling(); // just profiling, no need to check value - } - } else { - GE_TIMESTAMP_START(rtModelExecute); - GELOGI("rtModelExecute start."); - rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); - GELOGI("rtModelExecute end"); - GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); - - GE_TIMESTAMP_START(rtStreamSynchronize); - GELOGI("rtStreamSynchronize start."); - rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { - seq_end_flag = true; - } - if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { - GELOGI("The model with multiple datasets aborts normally."); - } else { - GE_IF_BOOL_EXEC( - rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); - (void)model->ReturnResult(current_data.index, false, seq_end_flag, - data_wrapper->GetOutput()); // [No need to check value] - CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); - } - - GELOGI("rtStreamSynchronize end."); - GE_IF_BOOL_EXEC(model->is_first_execute_, - GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); - } - + GE_TIMESTAMP_START(rtModelExecute); + GELOGI("rtModelExecute start."); + rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; + (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); + GELOGI("rtModelExecute end"); + GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); + + GE_TIMESTAMP_START(rtStreamSynchronize); + GELOGI("rtStreamSynchronize start."); + rt_ret = rtStreamSynchronize(model->rt_model_stream_); + if (rt_ret == RT_ERROR_END_OF_SEQUENCE) { + seq_end_flag = true; + } + GE_IF_BOOL_EXEC( + rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); + (void)model->ReturnResult(current_data.index, false, seq_end_flag, + data_wrapper->GetOutput()); // [No need to check value] + CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); + GELOGI("rtStreamSynchronize end."); + GE_IF_BOOL_EXEC(model->is_first_execute_, + GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_AFTER_PROC_START)); GE_TIMESTAMP_START(ReturnResult3); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 650f19eb..19888e1f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -440,7 +440,7 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); - Status ReportProfilingData(bool check_device = true); + Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 0b55d150..b595ac39 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -40,9 +40,7 @@ const int kCmdParSize = 2; const int kDumpCmdPairSize = 2; const std::size_t kProfCmdParaMaxSize = 1000; const std::size_t kProfStartCmdParaSize = 2; -const std::string kCmdTypeProfile = "profile"; const std::string kCmdTypeDump = "dump"; -const std::string kCmdTypeProfiling = "profiling"; const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; @@ -632,8 +630,7 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, - {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, {kCmdTypeProfStop, HandleProfStopCommand}, {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, @@ -648,21 +645,6 @@ Status ModelManager::HandleCommand(const Command &command) { } } -Status ModelManager::HandleAclProfilingCommand(const Command &command) { - if (command.cmd_params.size() < kCmdParSize) { - GELOGE(PARAM_INVALID, "When the cmd_type is 'profiling', the size of cmd_params must larger than 2."); - return PARAM_INVALID; - } - - std::string map_key = command.cmd_params[0]; - std::string value = command.cmd_params[1]; - if (map_key == PROFILE_CONFIG) { - ProfilingManager::Instance().SetProfilingConfig(value); - } - - return SUCCESS; -} - Status ModelManager::GetModelByCmd(const Command &command, std::shared_ptr &davinci_model) { if (command.cmd_params.size() < kCmdParSize) { @@ -809,29 +791,6 @@ Status ModelManager::HandleProfStopCommand(const Command &command) { return SUCCESS; } -Status ModelManager::HandleProfileCommand(const Command &command) { - if (command.cmd_params.size() < kCmdParSize) { - GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); - return PARAM_INVALID; - } - - std::string map_key = command.cmd_params[0]; - std::string value = command.cmd_params[1]; - - GELOGI("Profiling mode, Command key:%s , value:%s ", map_key.c_str(), value.c_str()); - - auto iter = PROFILE_COMPONENT_MAP.find(map_key); - if (iter != PROFILE_COMPONENT_MAP.end()) { - std::string property_value = (value == "on") ? "1" : "0"; - PropertiesManager::Instance().SetPropertyValue(iter->second, property_value); - } - - if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { - PropertiesManager::Instance().SetPropertyValue(map_key, value); - } - return SUCCESS; -} - static Status ParserPara(const Command &command, const string &dump_key, string &dump_value) { auto iter = std::find(command.cmd_params.begin(), command.cmd_params.end(), dump_key); if (iter != command.cmd_params.end()) { diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index dc3a5219..fc98d9c2 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -169,8 +169,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @brief comment handle function /// ge::Status HandleCommand(const Command &command); - static ge::Status HandleAclProfilingCommand(const Command &command); - static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); static ge::Status HandleProfModelSubscribeCommand(const Command &command); static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index e6729352..b984eec3 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -260,8 +260,7 @@ Status NodeDoneCallback::ProfilingReport() { } auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); return SUCCESS; } diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 306a804a..92700179 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -485,11 +485,9 @@ Status GELib::Finalize() { void GELib::ShutDownProfiling() { std::lock_guard lock(status_mutex_); - if (!ProfilingManager::Instance().ProfilingOpTraceOn() && ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); - } if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); + ProfilingManager::Instance().StopProfiling(); + ProfilingManager::Instance().PluginUnInit(); } } diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 6e51b6ff..a2652b67 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -72,8 +72,7 @@ Status ProfilingTaskInfo(OpTask *op_task) { std::vector compute_graph_info; auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info, - !profiling_manager.IsAclApiMode()); + profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); return SUCCESS; } } // namespace diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index d19b44a6..1476d505 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -71,7 +71,7 @@ max_code_len_per_line = 100 when DEBUG on """ white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", - "ge_ir_build.h", "ge_api.h", "ge_prof.h", "tensorflow_parser.h", "caffe_parser.h"] + "ge_ir_build.h", "ge_api.h", "tensorflow_parser.h", "caffe_parser.h"] include_dir_key_words = ["ge", "graph", "parser"] DEBUG = True diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h new file mode 100644 index 00000000..65a0eebb --- /dev/null +++ b/inc/framework/common/profiling/ge_profiling.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_PROFILING_H_ +#define INC_FRAMEWORK_COMMON_GE_PROFILING_H_ + +#include "ge/ge_api_error_codes.h" +#include "toolchain/prof_callback.h" + +#define MAX_DEV_NUM (64) +enum ProfCommandHandleType { + kProfCommandhandleInit = 0, + kProfCommandhandleStart, + kProfCommandhandleStop, + kProfCommandhandleFinalize, + kProfCommandhandleModelSubscribe, + kProfCommandhandleModelUnsubscribe +}; + +struct ProfCommandHandleData { + unit64_t profSwitch; + uint32_t devNums; // length of device id list + uint32_t devIdList[MAX_DEV_NUM]; + uint32_t modelId; +} + +ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); +ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); +ge::Status RegProfReporterCallback(MsprofReporterCallback func); +ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); + +#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ \ No newline at end of file diff --git a/inc/framework/common/profiling/ge_profiling_cb.h b/inc/framework/common/profiling/ge_profiling_cb.h new file mode 100644 index 00000000..025232ff --- /dev/null +++ b/inc/framework/common/profiling/ge_profiling_cb.h @@ -0,0 +1,24 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ +#define INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ + +#include "profiling/ge_profiling.h" + +bool IsInitialize(); + +#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ \ No newline at end of file From 62b3e8eeb4ad9ee4610dd717bd9a5481c6b65438 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 16:59:21 +0800 Subject: [PATCH 103/127] profiling AR version2 --- ge/CMakeLists.txt | 4 +- ge/common/profiling/ge_profiling.cc | 3 +- ge/common/profiling/profiling_manager.cc | 148 ++++++++++++++++------- ge/common/profiling/profiling_manager.h | 6 +- ge/graph/build/task_generator.cc | 20 +-- 5 files changed, 120 insertions(+), 61 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 7079e432..e36b45d9 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -648,9 +648,11 @@ target_include_directories(ge_runner PRIVATE target_link_libraries(ge_runner $ + -Wl,--whole-archive + msprofiler_fwk + -Wl,--no-whole-archive ge_memory adump_server - msprofiler_fwk static_mmpa -Wl,--no-as-needed graph diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 8da6b12a..bab699cc 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -125,8 +125,9 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { ge::Status RegProfReporterCallback(MsprofReporterCallback func) { if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { - GELOGW("Msprof ctrl callback is exist, just ignore it."); + GELOGW("Msprof reporter callback is exist, just ignore it."); } else { + GELOGI("GE register Msprof reporter callback."); ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); } // Pass MsprofReporterCallback to runtime diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index bcf6d366..fd56f15d 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -24,16 +24,9 @@ #include "graph/load/new_model_manager/davinci_model.h" namespace { -const char *const kJobID = "jobID"; -const char *const kDeviceID = "deviceID"; -const char *const kStartCfg = "startCfg"; -const char *const kFeatures = "features"; -const char *const kConf = "conf"; -const char *const kEvents = "events"; -const char *const kAiCoreEvents = "ai_core_events"; -const char *const kName = "name"; -const char *const kTraceID = "traceId"; -const char *const kProfDir = "resultPath"; +const char *const kTrainingTrace = "training_trace"; +const char *const kFpPoint = "fp_point"; +const char *const kBpPoint = "bp_point"; const size_t kReportMaxLen = 2048; const int32_t kMaxDeviceNum = 256; const std::string kConfigNumsdev = "devNums"; @@ -70,7 +63,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In return ret; } - if (is_load_profiling_) { + if (is_execute_profiling_) { int32_t cb_ret = prof_cb_.msprofCtrlCallback( static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), static_cast(&prof_conf), sizeof(MsprofGeOptions)); @@ -91,37 +84,42 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt #ifdef DAVINCI_SUPPORT_PROFILING // enable profiling by env char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 }; - is_load_profiling_ = false; + is_load_profiling_ = false; // Change in ProfInit is_execute_profiling_ = false; - (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); - (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, sizeof(MsprofGeOptions)); - - if ((env_profiling_mode != nullptr) && (strcmp("true", env_profiling_mode) == 0) - && (strcmp(prof_conf.options, "\0") != 0)) { - // enable profiling by env - is_load_profiling_ = true; - is_execute_profiling_ = true; - GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); - } else { - if (options.profiling_mode != "1" || options.profiling_options.empty()) { - return SUCCESS; - } + if (options.profiling_mode == "1" && !options.profiling_options.empty()) { // enable profiling by ge option - if (memcpy_s(prof_conf.options, sizeof(prof_conf.options), options.profiling_options.c_str(), + if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), sizeof(options.profiling_options.c_str())) != EOK) { GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); return INTERNAL_ERROR; } - is_load_profiling_ = true; is_execute_profiling_ = true; GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), prof_conf.options); + } else { + (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); + (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); + // The env is invalid + if ((env_profiling_mode == nullptr) || (strcmp("true", env_profiling_mode) != 0) + || (strcmp(prof_conf.options, "\0") == 0)) { + return SUCCESS; + } + // enable profiling by env + is_execute_profiling_ = true; + GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); } - if (!is_load_profiling_) { + if (!is_execute_profiling_) { return SUCCESS; } + // Parse json str for bp fp + Status ret = ParseOptions(prof_conf.options); + if (ret != ge::SUCCESS) { + GELOGE(ge::PARAM_INVALID, "Parse taining trace param failed."); + return ge::PARAM_INVALID; + } + if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(), sizeof(options.job_id.c_str())) != EOK) { GELOGE(INTERNAL_ERROR, "copy job_id failed."); @@ -134,23 +132,55 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return ge::SUCCESS; } +ge::Status ProfilingManager::ParseOptions(const std::string &options) { + if (options.empty()) { + GELOGE(ge::PARAM_INVALID, "Profiling options is empty.") + return ge::PARAM_INVALID; + } + try { + Json prof_options = Json::parse(options); + const std::string training_trace = prof_options[kTrainingTrace]; + if (training_trace.empty()) { + GELOGI("Training trace will not take effect."); + return ge::SUCCESS; + } + GELOGI("GE profiling training trace:%s", training_trace.c_str()); + if (training_trace != "on") { + GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str()); + return ge::PARAM_INVALID; + } + fp_point = prof_options[kFpPoint]; + bp_point = prof_options[kBpPoint]; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + } catch (...) { + GELOGE(FAILED, "Json prof_conf options is invalid."); + return ge::PARAM_INVALID; + } + return ge::SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() { #ifdef DAVINCI_SUPPORT_PROFILING uint64_t module = GetProfilingModule(); + // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal int32_t device_num = static_cast(device_id_.size()); - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); - if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Stop profiling: device id ptr is null."); - return; - } - for (int32_t i = 0; i < device_num; i++) { - device_id_ptr[i] = static_cast(device_id_[i]); - } - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); + if (device_num != 0) { + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "Stop profiling: device id ptr is null."); + return; + } + for (int32_t i = 0; i < device_num; i++) { + device_id_ptr[i] = static_cast(device_id_[i]); + } + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); + } } - + // stop profiling int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), nullptr, 0); @@ -475,6 +505,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi std::lock_guard lock(mutex_); is_load_profiling_ = false; is_training_trace_ = false; + is_execute_profiling_ = false; // profiling plugin uninit PluginUnInit(); @@ -714,7 +745,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin execute_model_prof_on = true; } GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); - return is_execute_profiling_ || execute_model_prof_on; + return execute_model_prof_on; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { @@ -744,5 +775,40 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs static_cast(&reporter_data), sizeof(ReporterData)); } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( + std::string &fp_point, std::string &bp_point) { + // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Bp Fp have been initialized in env or options"); + fp_point = fp_point_; + bp_point = bp_point_; + GELOGI("Bp Fp have been initailized in env or options, bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); + return; + } + // ProfApi mode and training trace is set + try { + char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; + INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); + if (ret != EN_OK) { + GELOGI("PROFILING_OPTIONS env is not exist."); + return; + } + GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); + Json prof_options = Json::parse(env_profiling_options); + + fp_point_ = prof_options[kFpPoint]; + bp_point_ = prof_options[kBpPoint]; + + fp_point = fp_point_; + bp_point = bp_point_; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + } catch (...) { + GELOGE(FAILED, "Json prof options is invalid."); + return ge::PARAM_INVALID; + } +} + } // namespace ge diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index c9434a10..c9ceed92 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -63,7 +63,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, @@ -76,8 +76,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } + void GetFpBpPoint(std::string &fp_point, std::string &bp_point); private: Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); + Status ParseOptions(const std::string &options); Status ProfParseParam(const std::map &config_para, int32_t &device_num, vector &device_list); Status ProfParseDeviceId(const std::map &config_para, @@ -96,6 +98,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { uint32_t subscribe_count_; std::mutex mutex_; MsprofCallback prof_cb_; + std::string fp_point_; + std::string bp_point_; }; } // namespace ge #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 41607f1f..b506f945 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -49,8 +49,6 @@ const char *const kIsLastNode = "is_last_node"; const char *const kIsInputVar = "INPUT_IS_VAR"; const char *const kIsOutputVar = "OUTPUT_IS_VAR"; const char *const kProfilingMode = "PROFILING_MODE"; -const char *const kProfilingFpPoint = "FP_POINT"; -const char *const kProfilingBpPoint = "BP_POINT"; const uint32_t kProfilingArStep = 2; const uint64_t kProfilingFpStartLogid = 1; const uint64_t kProfilingBpEndLogid = 2; @@ -810,35 +808,23 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint vector &all_reduce_nodes, std::string &fp_point_str, std::string &bp_point_str) const { - if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS && - ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS && - !fp_point_str.empty() && !bp_point_str.empty()) { - return SUCCESS; - } + ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str); Status ret = SUCCESS; - const char *fp_point = std::getenv(kProfilingFpPoint); - if (fp_point == nullptr) { + if (fp_point_str.empty()) { ret = AutoFindFpOpIndex(graph, profiling_point); if (ret != SUCCESS) { GELOGW("First forward profiling op_index not set and FindFpOpIndex failed."); return FAILED; } - } else { - fp_point_str = string(fp_point); - GELOGI("Get fp_point_str from env %s", fp_point_str.c_str()); } - const char *bp_point = std::getenv(kProfilingBpPoint); - if (bp_point == nullptr) { + if (bp_point_str.empty()) { ret = AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes); if (ret != SUCCESS) { GELOGW("Last backward profiling op_index not set and FindBpOpIndex failed."); return FAILED; } - } else { - bp_point_str = string(bp_point); - GELOGI("Get bp_point_str from env %s", bp_point_str.c_str()); } return SUCCESS; From 612c82f405cdde94b26e538d58942e94dda5d9fd Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 17:41:02 +0800 Subject: [PATCH 104/127] Profiling AR version 3 --- ge/common/profiling/ge_profiling.cc | 74 ++++++++++++++++-------- ge/common/profiling/profiling_manager.cc | 56 ++++++++++++++++-- ge/common/profiling/profiling_manager.h | 17 ++++++ 3 files changed, 119 insertions(+), 28 deletions(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index bab699cc..4fc61ca0 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -22,6 +22,7 @@ #include "graph/load/graph_loader.h" #include "init/gelib.h" #include "framework/common/ge_inner_error_codes.h" +#include "opskernel_manager/ops_kernel_builder_manager.h" namespace { const uint32_t kDeviceListIndex = 3; @@ -44,7 +45,7 @@ const std::map kProfCommandTypeMap = { {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; } // namespace -bool TransProfConfigToParam(const MsprofCommandHandle &profCommand, vector &prof_config_params) { +bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector &prof_config_params) { prof_config_params.clear(); prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(std::to_string(profCommand.devNums)); @@ -71,7 +72,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { return false; } if (device_nums == 0 || device_nums > MAX_DEV_NUM) { - GELOGE(ge::PARAM_INVALID, "The device nums is invalid."); + GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums); return false; } @@ -105,16 +106,26 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { } ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr."); + return ge::PARAM_INVALID; + } if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { GELOGW("Msprof ctrl callback is exist, just ignore it."); } else { + GELOGI("GE register Msprof ctrl callback."); ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); } return ge::SUCCESS; } ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr."); + return ge::PARAM_INVALID; + } // Pass MsprofSetDeviceCallback to runtime + GELOGI("GE pass setdevice callback to runtime."); ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast(func)); if (rt_ret != ge::SUCCESS) { GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); @@ -124,54 +135,71 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { } ge::Status RegProfReporterCallback(MsprofReporterCallback func) { + if (func == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { GELOGW("Msprof reporter callback is exist, just ignore it."); } else { GELOGI("GE register Msprof reporter callback."); ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); + // Pass MsprofReporterCallback to runtime + ge::Status rt_ret = rtSetMsprofReporterCallback(func); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); + return rt_ret; + } + // Pass MsprofReporterCallback to hccl in opskernel so initialize + rt_ret = OpskernelBuilderManager::Instance().RegProfReporterCallback(func); + if (rt_ret != ge::SUCCESS) { + GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); + return rt_ret; + } } - // Pass MsprofReporterCallback to runtime - ge::Status rt_ret = rtSetMsprofReporterCallback(func); - if (rt_ret != ge::SUCCESS) { - GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); - return rt_ret; - } - // Pass MsprofReporterCallback to hccl in opskernel so initialize - return ge::SUCCESS; } ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { - GE_CHECK_NOTNULL(data); - MsprofCommandHandle *prof_config_param = (MsprofCommandHandle *)data; - if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { - return ge::FAILED; - } - std::vector prof_params; - if (!TransProfConfigToParam(*prof_config_param, prof_params)) { - GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); - return ge::PARAM_INVALID; + if (type != kProfCommandhandleFinalize) { + GE_CHECK_NOTNULL(data); } + MsprofCommandHandleData *prof_config_param = (MsprofCommandHandleData *)data; auto iter = kProfCommandTypeMap.find(type); if (iter == kProfCommandTypeMap.end()) { GELOGW("The prof comand type is invalid."); return ge::PARAM_INVALID; } + std::vector prof_params; + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { + return ge::FAILED; + } + + if (!TransProfConfigToParam(*prof_config_param, prof_params)) { + GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); + return ge::PARAM_INVALID; + } + } ge::GraphLoader graph_loader; ge::Command command; command.cmd_params.clear(); command.cmd_type = iter->second; command.cmd_params = prof_params; - command.module_index = prof_config_param->profSwitch; - GELOGI("GE commandhandle execute, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), - prof_params[kDeviceListIndex].c_str(), command.module_index); + if (type != kProfCommandhandleFinalize) { + command.module_index = prof_config_param->profSwitch; + } + GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); + if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { + GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); + } ge::Status ret = graph_loader.CommandHandle(command); if (ret != ge::SUCCESS) { GELOGE(ret, "Handle profiling command failed"); return ge::FAILED; } - GELOGI("Successfully execute profiling command 0x%llx.", command.module_index); + GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); return ge::SUCCESS; } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index fd56f15d..bfc747b5 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -22,6 +22,7 @@ #include "graph/ge_context.h" #include "runtime/base.h" #include "graph/load/new_model_manager/davinci_model.h" +#include "opskernel_manager/ops_kernel_builder_manager.h" namespace { const char *const kTrainingTrace = "training_trace"; @@ -41,7 +42,10 @@ namespace ge { ProfilingManager::ProfilingManager() : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), - subscribe_count_(0) {} + subscribe_count_(0) { + prof_cb_.msprofCtrlCallback = nullptr; + prof_cb_.msprofReporterCallback = nullptr; +} ProfilingManager::~ProfilingManager() {} @@ -64,6 +68,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } if (is_execute_profiling_) { + if (prof_cb_.msprofCtrlCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); + return ge::PARAM_INVALID; + } int32_t cb_ret = prof_cb_.msprofCtrlCallback( static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), static_cast(&prof_conf), sizeof(MsprofGeOptions)); @@ -116,7 +124,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt // Parse json str for bp fp Status ret = ParseOptions(prof_conf.options); if (ret != ge::SUCCESS) { - GELOGE(ge::PARAM_INVALID, "Parse taining trace param failed."); + GELOGE(ge::PARAM_INVALID, "Parse training trace param failed."); return ge::PARAM_INVALID; } @@ -182,6 +190,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } // stop profiling + if (prof_cb_.msprofCtrlCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); + return ge::PARAM_INVALID; + } int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), nullptr, 0); if (cb_ret != 0) { @@ -210,7 +222,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append("\n")); - ReporterData reporter_data; + ReporterData reporter_data{}; reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); @@ -298,7 +310,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { #ifdef DAVINCI_SUPPORT_PROFILING - ReporterData reporter_data; + ReporterData reporter_data{}; int ret = -1; int32_t cb_ret = -1; size_t index = data.size() / kReportMaxLen; @@ -487,6 +499,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn GELOGE(FAILED, "Runtime profiler start failed."); return FAILED; } + Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(model_load_mask); + if (hccl_ret != SUCCESS) { + GELOGE(FAILED, "Hccl profiler start failed."); + return FAILED; + } is_load_profiling_ = true; GELOGI("Prof init: model load profiling on."); } @@ -517,6 +534,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi return FAILED; } + Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK); + if (hccl_ret != SUCCESS) { + GELOGE(FAILED, "Hccl profiler stop failed."); + return FAILED; + } for (auto device_id_module : device_id_module_map_) { if (device_id_module.second != 0) { uint32_t device_id = static_cast(device_id_module.first); @@ -640,6 +662,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Runtime profiler config proc failed."); return FAILED; } + + Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(module); + if (hccl_ret != SUCCESS) { + GELOGE(FAILED, "Hccl profiler start failed."); + return FAILED; + } if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { @@ -681,6 +709,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); return FAILED; } + Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(module); + if (hccl_ret != SUCCESS) { + GELOGE(FAILED, "Hccl profiler stop failed."); + return FAILED; + } uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { @@ -749,6 +782,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } return prof_cb_.msprofReporterCallback( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), @@ -757,6 +794,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const { #ifdef DAVINCI_SUPPORT_PROFILING + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } int32_t cb_ret = prof_cb_.msprofReporterCallback( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), @@ -769,6 +810,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( ReporterData &reporter_data) const { + if (prof_cb_.msprofReporterCallback == nullptr) { + GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); + return ge::PARAM_INVALID; + } return prof_cb_.msprofReporterCallback( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), @@ -806,8 +851,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP } } catch (...) { GELOGE(FAILED, "Json prof options is invalid."); - return ge::PARAM_INVALID; + return; } + return; } diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index c9ceed92..5fa4fac4 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -35,6 +35,23 @@ using Json = nlohmann::json; namespace { const std::string GE_PROFILING_MODULE = "Framework"; + // DataTypeConfig MASK + #define PROF_ACL_API_MASK 0x0001 + #define PROF_TASK_TIME_MASK 0x0002 + #define PROF_AICORE_METRICS_MASK 0x0004 + #define PROF_AICPU_TRACE_MASK 0x0008 + #define PROF_MODEL_EXECUTE_MASK 0x0010 + #define PROF_RUNTIME_API_MASK 0x0020 + #define PROF_RUNTIME_TRACE_MASK 0x0040 + #define PROF_SCHEDULE_TIMELINE_MASK 0x0080 + #define PROF_SCHEDULE_TRACE_MASK 0x0100 + #define PROF_AIVECTORCORE_METRICS_MASK 0x0200 + #define PROF_SUBTASK_TIME_MASK 0x0400 + #define PROF_TRAINING_TRACE_MASK 0x0800 + #define PROF_HCCL_TRACE_MASK 0x1000 + #define PROF_DATA_PROCESS_MASK 0x2000 + #define PROF_MODEL_LOAD_MASK 0x8000000000000000 + } // namespace namespace ge { struct DeviceSubsInfo { From 09e82a80daad693addcd56d5be64d43407b765f9 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 19:36:21 +0800 Subject: [PATCH 105/127] Profiling AR version4 --- ge/common/profiling/ge_profiling.cc | 6 ++--- ge/common/profiling/profiling_manager.cc | 22 +++++++++---------- .../load/new_model_manager/davinci_model.cc | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 4fc61ca0..b6708f7a 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -139,7 +139,7 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) { GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); return ge::PARAM_INVALID; } - if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { + if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) { GELOGW("Msprof reporter callback is exist, just ignore it."); } else { GELOGI("GE register Msprof reporter callback."); @@ -151,7 +151,7 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) { return rt_ret; } // Pass MsprofReporterCallback to hccl in opskernel so initialize - rt_ret = OpskernelBuilderManager::Instance().RegProfReporterCallback(func); + rt_ret = OpsKernelBuilderManager::Instance().RegProfReporterCallBack(func); if (rt_ret != ge::SUCCESS) { GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); return rt_ret; @@ -164,7 +164,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le if (type != kProfCommandhandleFinalize) { GE_CHECK_NOTNULL(data); } - MsprofCommandHandleData *prof_config_param = (MsprofCommandHandleData *)data; + ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data; auto iter = kProfCommandTypeMap.find(type); if (iter == kProfCommandTypeMap.end()) { GELOGW("The prof comand type is invalid."); diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index bfc747b5..222cde6a 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -76,7 +76,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), static_cast(&prof_conf), sizeof(MsprofGeOptions)); if (cb_ret != 0) { - GELOGE(FAILED, "call msprofCtrlCallback failed, type:%u, return:%d", + GELOGE(FAILED, "Call msprofCtrlCallback failed, type:%u, return:%d", static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); return FAILED; } @@ -110,7 +110,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt // The env is invalid if ((env_profiling_mode == nullptr) || (strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { - return SUCCESS; + return SUCCESS; } // enable profiling by env is_execute_profiling_ = true; @@ -157,8 +157,8 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str()); return ge::PARAM_INVALID; } - fp_point = prof_options[kFpPoint]; - bp_point = prof_options[kBpPoint]; + fp_point_ = prof_options[kFpPoint]; + bp_point_ = prof_options[kBpPoint]; if (!fp_point_.empty() && !bp_point_.empty()) { GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); } @@ -175,7 +175,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal int32_t device_num = static_cast(device_id_.size()); if (device_num != 0) { - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); if (device_id_ptr == nullptr) { GELOGE(FAILED, "Stop profiling: device id ptr is null."); return; @@ -499,7 +499,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn GELOGE(FAILED, "Runtime profiler start failed."); return FAILED; } - Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(model_load_mask); + Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStart(model_load_mask); if (hccl_ret != SUCCESS) { GELOGE(FAILED, "Hccl profiler start failed."); return FAILED; @@ -534,7 +534,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi return FAILED; } - Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK); + Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK); if (hccl_ret != SUCCESS) { GELOGE(FAILED, "Hccl profiler stop failed."); return FAILED; @@ -663,7 +663,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt return FAILED; } - Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(module); + Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStart(module); if (hccl_ret != SUCCESS) { GELOGE(FAILED, "Hccl profiler start failed."); return FAILED; @@ -709,7 +709,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); return FAILED; } - Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(module); + Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStop(module); if (hccl_ret != SUCCESS) { GELOGE(FAILED, "Hccl profiler stop failed."); return FAILED; @@ -796,7 +796,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn #ifdef DAVINCI_SUPPORT_PROFILING if (prof_cb_.msprofReporterCallback == nullptr) { GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); - return ge::PARAM_INVALID; + return; } int32_t cb_ret = prof_cb_.msprofReporterCallback( static_cast(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), @@ -827,7 +827,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP GELOGI("Bp Fp have been initialized in env or options"); fp_point = fp_point_; bp_point = bp_point_; - GELOGI("Bp Fp have been initailized in env or options, bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); + GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); return; } // ProfApi mode and training trace is set diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 18435571..bade866f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2251,7 +2251,7 @@ inline int64_t SumSize(const vector &size_list) { Status DavinciModel::SinkModelProfile() { // profiling plugin must be registered auto &prof_mgr = ProfilingManager::Instance(); - ReporterData reporter_data; + ReporterData reporter_data{}; // report model data tag name std::string tag_name; tag_name.append("model_load_info_").append(std::to_string(this->Id())); @@ -2424,7 +2424,7 @@ Status DavinciModel::SinkModelProfile() { Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { // profiling plugin must be registered auto &prof_mgr = ProfilingManager::Instance(); - ReporterData reporter_data; + ReporterData reporter_data{}; // report model data tag name std::string tag_name; tag_name.append("model_time_info_") From 2e87815ba5382d6b3af45a5f39215a431aac39f4 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 19:56:58 +0800 Subject: [PATCH 106/127] Profiling AR version 5 --- ge/init/gelib.cc | 6 +++- .../ops_kernel_builder_manager.cc | 28 +++++++++++++++++++ .../ops_kernel_builder_manager.h | 6 ++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 92700179..20859c07 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -145,6 +145,10 @@ Status GELib::InnerInitialize(const map &options) { return initOpsBuilderStatus; } + if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { + GE_CHK_RT_RET(rtSetDevice(options.device_id)); + } + GELOGI("sessionManager initial."); GE_TIMESTAMP_START(SessionManagerInitialize); Status initSmStatus = sessionManager_.Initialize(options); @@ -338,7 +342,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt // set device id GELOGI("set logical device id:%u", options.device_id); GetContext().SetCtxDeviceId(static_cast(options.device_id)); - GE_CHK_RT_RET(rtSetDevice(options.device_id)); + // GE_CHK_RT_RET(rtSetDevice(options.device_id)); // In the scenario that the automatic add fusion is set, but there is no cleanaddr operator, // maybe need to check it diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index e0001fcd..30c3ffcc 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -167,4 +167,32 @@ Status OpsKernelBuilderManager::GenerateTask(const Node &node, GELOGD("Done invoking GenerateTask successfully"); return SUCCESS; } + +Status OpsKernelBuilderManager::RegProfReporterCallBack(void *func) const { + GE_CHECK_NOTNULL(func); + for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { + GE_CHK_STATUS_RET(it->second->RegProfReporterCallBack(func), + "Failed to invoke RegProfReporterCallBack."); + } + GELOGD("Done invoking RegProfReporterCallBack successfully"); + return SUCCESS; +} + +Status OpsKernelBuilderManager::ProfStart(uint64_t prof_config) const { + for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { + GE_CHK_STATUS_RET(it->second->ProfStart(prof_config), + "Failed to invoke ProfStart."); + } + GELOGD("Done invoking ProfStart successfully"); + return SUCCESS; +} + +Status OpsKernelBuilderManager::ProfStop((uint64_t prof_config) const { + for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { + GE_CHK_STATUS_RET(it->second->ProfStop(prof_config), + "Failed to invoke ProfStop."); + } + GELOGD("Done invoking ProfStop successfully"); + return SUCCESS; +} } // namespace ge \ No newline at end of file diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index 7a95ddfa..bcd05fe0 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -46,6 +46,12 @@ class OpsKernelBuilderManager { Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) const; + Status RegProfReporterCallBack(void *func) const; + + Status ProfStart(uint64_t prof_config) const; + + Status ProfStop((uint64_t prof_config) const; + private: OpsKernelBuilderManager() = default; static Status GetLibPaths(const std::map &options, std::string &lib_paths); From bbae4fcf946eb39259d5e7512fdf835742c1819b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 20:02:18 +0800 Subject: [PATCH 107/127] Profiling remove ge_prof.h/.c --- ge/client/ge_prof.cc | 369 -------------------------------------- inc/external/ge/ge_prof.h | 102 ----------- 2 files changed, 471 deletions(-) delete mode 100644 ge/client/ge_prof.cc delete mode 100644 inc/external/ge/ge_prof.h diff --git a/ge/client/ge_prof.cc b/ge/client/ge_prof.cc deleted file mode 100644 index ede38430..00000000 --- a/ge/client/ge_prof.cc +++ /dev/null @@ -1,369 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge/ge_prof.h" -#include "ge/ge_api.h" -#include "init/gelib.h" -#include "common/debug/log.h" -#include "framework/common/debug/ge_log.h" -#include "common/profiling/profiling_manager.h" -#include "graph/load/graph_loader.h" -#include "toolchain/prof_acl_api.h" - -using std::map; -using std::string; -using std::vector; - -namespace { -const uint32_t kMaxDeviceNum = 64; -const uint32_t kDeviceListIndex = 3; -const std::string kProfilingInit = "prof_init"; -const std::string kProfilingFinalize = "prof_finalize"; -const std::string kProfilingStart = "prof_start"; -const std::string kProfilingStop = "prof_stop"; -const std::string kDeviceNums = "devNums"; -const std::string kDeviceIdList = "devIdList"; -const std::string kAicoreMetrics = "aicoreMetrics"; - -const std::map kProfAicoreMetricsToString = { - {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, - {ge::kAicorePipeline, "AICORE_PIPELINE"}, - {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, - {ge::kAicoreMemory, "AICORE_MEMORY"}, - {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}}; -} // namespace - -static bool g_graph_prof_init_ = false; -static std::mutex g_prof_mutex_; - -namespace ge { -struct aclgrphProfConfig { - ProfConfig config; -}; - -Status aclgrphProfInit(const char *profiler_path, uint32_t length) { - GELOGT(TRACE_INIT, "Graph prof init start"); - - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - if (g_graph_prof_init_) { - GELOGW("Multi graph profiling initializations."); - return GE_PROF_MULTI_INIT; - } - - Status ret = CheckPath(profiler_path, length); - if (ret != SUCCESS) { - GELOGE(ret, "Profiling config path is invalid."); - return ret; - } - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof init failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - - ret = ProfInit(profiler_path); - if (ret != SUCCESS) { - GELOGE(ret, "ProfInit init fail"); - return ret; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingInit; - command.module_index = PROF_MODEL_LOAD; - ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); - return ret; - } - if (!g_graph_prof_init_) { - g_graph_prof_init_ = true; - GELOGI("Profiling init successfully."); - } - - GELOGI("Successfully execute GraphProfInit."); - return SUCCESS; -} - -Status aclgrphProfFinalize() { - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingFinalize; - Status ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); - return ret; - } - - ret = ProfFinalize(); - if (ret != SUCCESS) { - GELOGE(ret, "Finalize profiling failed, result = %d", ret); - } - - if (ret == SUCCESS) { - g_graph_prof_init_ = false; - GELOGI("Successfully execute GraphProfFinalize."); - } - return ret; -} - -bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { - prof_config_params.clear(); - prof_config_params.emplace_back(kDeviceNums); - prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); - prof_config_params.emplace_back(kDeviceIdList); - std::string devID = ""; - if (profiler_config->config.devNums == 0) { - GELOGW("The device num is invalid."); - return false; - } - for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { - devID.append(std::to_string(profiler_config->config.devIdList[i])); - if (i != profiler_config->config.devNums - 1) { - devID.append(","); - } - } - - prof_config_params.push_back(devID); - prof_config_params.push_back(kAicoreMetrics); - auto iter = - kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); - if (iter == kProfAicoreMetricsToString.end()) { - GELOGW("The prof aicore metrics is invalid."); - return false; - } - prof_config_params.push_back(iter->second); - return true; -} - -bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { - if (deviceid_list == nullptr) { - GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); - return false; - } - if (device_nums == 0 || device_nums > kMaxDeviceNum) { - GELOGE(PARAM_INVALID, "The device nums is invalid."); - return false; - } - - // real device num - int32_t dev_count = 0; - rtError_t rt_err = rtGetDeviceCount(&dev_count); - if (rt_err != RT_ERROR_NONE) { - GELOGE(INTERNAL_ERROR, "Get the Device count fail."); - return false; - } - - if (device_nums > static_cast(dev_count)) { - GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); - return false; - } - - std::unordered_set record; - for (size_t i = 0; i < device_nums; ++i) { - uint32_t dev_id = deviceid_list[i]; - if (dev_id >= static_cast(dev_count)) { - GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); - return false; - } - if (record.count(dev_id) > 0) { - GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); - return false; - } - record.insert(dev_id); - } - return true; -} - -aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, - ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, - uint64_t data_type_config) { - if (!isProfConfigValid(deviceid_list, device_nums)) { - return nullptr; - } - aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); - if (config == nullptr) { - GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); - return nullptr; - } - config->config.devNums = device_nums; - if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, - device_nums * sizeof(uint32_t)) != EOK) { - GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); - delete config; - return nullptr; - } - - config->config.aicoreMetrics = static_cast(aicore_metrics); - config->config.dataTypeConfig = data_type_config; - GELOGI("Successfully create prof config."); - return config; -} - -Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); - return PARAM_INVALID; - } - - delete profiler_config; - GELOGI("Successfully destroy prof config."); - return SUCCESS; -} - -Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); - return FAILED; - } - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - - Status ret = ProfStartProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Start profiling failed, prof result = %d", ret); - return FAILED; - } - - std::vector prof_params; - if (!TransProfConfigToParam(profiler_config, prof_params)) { - GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); - return PARAM_INVALID; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingStart; - command.cmd_params = prof_params; - command.module_index = profiler_config->config.dataTypeConfig; - GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), - prof_params[kDeviceListIndex].c_str(), command.module_index); - ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command failed"); - return FAILED; - } - - GELOGI("Successfully execute GraphProfStartProfiling."); - - return SUCCESS; -} - -Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { - if (profiler_config == nullptr) { - GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); - return FAILED; - } - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); - return FAILED; - } - - std::lock_guard lock(g_prof_mutex_); - // if command mode is set, just return - if (ProfilingManager::Instance().ProfilingOn()) { - GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); - return GE_PROF_MODE_CONFLICT; - } - if (!g_graph_prof_init_) { - GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); - return GE_PROF_NOT_INIT; - } - - for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { - uint64_t data_type_config; - Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); - if (status != SUCCESS) { - GELOGE(status, "Prof get data type config failed, prof result = %d", status); - return status; - } - if (data_type_config != profiler_config->config.dataTypeConfig) { - GELOGE(FAILED, "data type config verify failed"); - return FAILED; - } - } - - std::vector prof_params; - if (!TransProfConfigToParam(profiler_config, prof_params)) { - GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); - return PARAM_INVALID; - } - - GraphLoader graph_loader; - Command command; - command.cmd_params.clear(); - command.cmd_type = kProfilingStop; - command.cmd_params = prof_params; - command.module_index = profiler_config->config.dataTypeConfig; - GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), - prof_params[kDeviceListIndex].c_str(), command.module_index); - Status ret = graph_loader.CommandHandle(command); - if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command failed"); - return FAILED; - } - - ret = ProfStopProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Stop profiling failed, prof result = %d", ret); - return ret; - } - - GELOGI("Successfully execute GraphProfStopProfiling."); - return SUCCESS; -} -} // namespace ge diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h deleted file mode 100644 index 658cea76..00000000 --- a/inc/external/ge/ge_prof.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_EXTERNAL_GE_GE_PROF_H_ -#define INC_EXTERNAL_GE_GE_PROF_H_ - -#include -#include -#include - -#include "ge/ge_api_error_codes.h" - -namespace ge { -enum ProfDataTypeConfig { - kProfTaskTime = 0x0002, - kProfAiCoreMetrics = 0x0004, - kProfAicpuTrace = 0x0008, - kProfTrainingTrace = 0x0800, - kProfHcclTrace = 0x1000 -}; - -enum ProfilingAicoreMetrics { - kAicoreArithmaticThroughput = 0, - kAicorePipeline = 1, - kAicoreSynchronization = 2, - kAicoreMemory = 3, - kAicoreInternalMemory = 4, - kAicoreStall = 5 -}; - -typedef struct ProfAicoreEvents ProfAicoreEvents; -typedef struct aclgrphProfConfig aclgrphProfConfig; - -/// -/// @ingroup AscendCL -/// @brief Initialize the profiling and set profiling configuration path -/// @param [in] profiler_path: configuration path of profiling -/// @param [in] length: length of configuration path -/// @return Status result of function -/// -Status aclgrphProfInit(const char *profiler_path, uint32_t length); - -/// -/// @ingroup AscendCL -/// @brief Finalize profiling -/// @return Status result of function -/// -Status aclgrphProfFinalize(); - -/// -/// @ingroup AscendCL -/// @brief Create data of type aclgrphProfConfig -/// @param [in] deviceid_list: device id list -/// @param [in] device_nums: device numbers -/// @param [in] aicore_metrics: type of aicore metrics -/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now -/// @param [in] data_type_config: modules need profiling -/// @return Status result of function -/// -aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, - ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, - uint64_t data_type_config); - -/// -/// @ingroup AscendCL -/// @brief Destroy data of type aclgrphProfConfig -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); - -/// -/// @ingroup AscendCL -/// @brief Start profiling of modules which is configured by profiler config -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfStart(aclgrphProfConfig *profiler_config); - -/// -/// @ingroup AscendCL -/// @brief Stop profiling of modules which is configured by profiler config -/// @param [in] profiler_config: config of profiling -/// @return Status result of function -/// -Status aclgrphProfStop(aclgrphProfConfig *profiler_config); -} // namespace ge - -#endif // INC_EXTERNAL_GE_GE_PROF_H_ From d54a6ea9c9f75a9dd7a569dde6febc504bb0de44 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 20:15:11 +0800 Subject: [PATCH 108/127] Profiling AR version 6 --- ge/common/profiling/profiling_manager.cc | 3 +-- inc/framework/common/profiling/ge_profiling.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 222cde6a..1a0a1b2f 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -192,7 +192,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf // stop profiling if (prof_cb_.msprofCtrlCallback == nullptr) { GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); - return ge::PARAM_INVALID; + return; } int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), nullptr, 0); @@ -824,7 +824,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP std::string &fp_point, std::string &bp_point) { // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init if (!fp_point_.empty() && !bp_point_.empty()) { - GELOGI("Bp Fp have been initialized in env or options"); fp_point = fp_point_; bp_point = bp_point_; GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index 65a0eebb..10b53d6d 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -31,11 +31,11 @@ enum ProfCommandHandleType { }; struct ProfCommandHandleData { - unit64_t profSwitch; + uint64_t profSwitch; uint32_t devNums; // length of device id list uint32_t devIdList[MAX_DEV_NUM]; uint32_t modelId; -} +}; ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); From b6ee361521fdea9d9b9b625db2656262e7d8fdf6 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 20:30:57 +0800 Subject: [PATCH 109/127] Profiling AR version 7 --- ge/common/profiling/profiling_manager.cc | 2 ++ ge/graph/load/new_model_manager/davinci_model.cc | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1a0a1b2f..5419c8e8 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -588,6 +588,7 @@ Status ProfilingManager::ProfParseDeviceId(const std::map return FAILED; } catch (std::out_of_range &) { GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str()); + return FAILED; } catch (...) { GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str()); return FAILED; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index bade866f..bc755e07 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2819,15 +2819,20 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(rtStreamSynchronize); GELOGI("rtStreamSynchronize start."); rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == RT_ERROR_END_OF_SEQUENCE) { + if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { seq_end_flag = true; } - GE_IF_BOOL_EXEC( + if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { + GELOGI("The model with multiple datasets aborts normally."); + } else { + GE_IF_BOOL_EXEC( rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); (void)model->ReturnResult(current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); // [No need to check value] CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); continue); + } + GELOGI("rtStreamSynchronize end."); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); From 8ab72ebdb9d71eb29be9682e594b842d5c0c2d55 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 22:28:45 +0800 Subject: [PATCH 110/127] Profiling version 8 --- ge/opskernel_manager/ops_kernel_builder_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index bcd05fe0..a2d3e565 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -50,7 +50,7 @@ class OpsKernelBuilderManager { Status ProfStart(uint64_t prof_config) const; - Status ProfStop((uint64_t prof_config) const; + Status ProfStop(uint64_t prof_config) const; private: OpsKernelBuilderManager() = default; From 95676781681cfd1611646afb8cd183a60d5f11a2 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 22:37:29 +0800 Subject: [PATCH 111/127] Profiling AR version 9 --- ge/common/profiling/ge_profiling.cc | 2 +- ge/common/profiling/profiling_manager.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index b6708f7a..b06a8448 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -151,7 +151,7 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) { return rt_ret; } // Pass MsprofReporterCallback to hccl in opskernel so initialize - rt_ret = OpsKernelBuilderManager::Instance().RegProfReporterCallBack(func); + rt_ret = ge::OpsKernelBuilderManager::Instance().RegProfReporterCallBack(func); if (rt_ret != ge::SUCCESS) { GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); return rt_ret; diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 5419c8e8..5cb08dd0 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -142,7 +142,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt ge::Status ProfilingManager::ParseOptions(const std::string &options) { if (options.empty()) { - GELOGE(ge::PARAM_INVALID, "Profiling options is empty.") + GELOGE(ge::PARAM_INVALID, "Profiling options is empty."); return ge::PARAM_INVALID; } try { From 216a6f3b6a887ce3e800d4344ddacb4493ff3c85 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 22:47:28 +0800 Subject: [PATCH 112/127] Profiling AR version 10 --- ge/common/profiling/ge_profiling.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index b06a8448..67e6b619 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -151,7 +151,7 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) { return rt_ret; } // Pass MsprofReporterCallback to hccl in opskernel so initialize - rt_ret = ge::OpsKernelBuilderManager::Instance().RegProfReporterCallBack(func); + rt_ret = ge::OpsKernelBuilderManager::Instance().RegProfReporterCallBack(reinterpret_cast(func)); if (rt_ret != ge::SUCCESS) { GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); return rt_ret; From 4f898216bd2b269d0be6fb956510a225d5ed5e8d Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 10 Dec 2020 22:54:40 +0800 Subject: [PATCH 113/127] Profiling AR version 11 --- ge/opskernel_manager/ops_kernel_builder_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index 30c3ffcc..4361df44 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -187,7 +187,7 @@ Status OpsKernelBuilderManager::ProfStart(uint64_t prof_config) const { return SUCCESS; } -Status OpsKernelBuilderManager::ProfStop((uint64_t prof_config) const { +Status OpsKernelBuilderManager::ProfStop(uint64_t prof_config) const { for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { GE_CHK_STATUS_RET(it->second->ProfStop(prof_config), "Failed to invoke ProfStop."); From c3be30b9ce1b422a6b536d8790fc689ed8aaebb2 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 11 Dec 2020 10:58:17 +0800 Subject: [PATCH 114/127] Profiling AR VERSION11 remove task_id --- ge/common/profiling/profiling_manager.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 5cb08dd0..df36d5f1 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -133,9 +133,6 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt GELOGE(INTERNAL_ERROR, "copy job_id failed."); return INTERNAL_ERROR; } - - // get traceId from options - prof_conf.traceId = GetContext().TraceId(); #endif return ge::SUCCESS; } From 46179fe5e2721ca30a8ea8a6e787d7f3c346e043 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 11 Dec 2020 14:12:13 +0800 Subject: [PATCH 115/127] update deviceid --- ge/init/gelib.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 20859c07..03028e53 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -146,7 +146,7 @@ Status GELib::InnerInitialize(const map &options) { } if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - GE_CHK_RT_RET(rtSetDevice(options.device_id)); + GE_CHK_RT_RET(rtSetDevice(options_.device_id)); } GELOGI("sessionManager initial."); From 77ff5be0a0e86d61514d78753b06477c5a3b91dd Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 11 Dec 2020 20:24:51 +0800 Subject: [PATCH 116/127] Profiling AR version11 --- ge/common/profiling/profiling_manager.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index df36d5f1..3adf9065 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -60,7 +60,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In subscribe_count_ = 0; GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); - struct MsprofGeOptions prof_conf = { 0 }; + struct MsprofGeOptions prof_conf = {{ 0 }}; Status ret = InitFromOptions(options, prof_conf); if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); @@ -108,8 +108,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); // The env is invalid - if ((env_profiling_mode == nullptr) || (strcmp("true", env_profiling_mode) != 0) - || (strcmp(prof_conf.options, "\0") == 0)) { + if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { return SUCCESS; } // enable profiling by env From 77c21e71b755f9dafccd5eb69673e0eac28f226c Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 11 Dec 2020 22:09:12 +0800 Subject: [PATCH 117/127] Profiling AR version12 --- ge/common/profiling/profiling_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 3adf9065..457ee464 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -98,7 +98,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt if (options.profiling_mode == "1" && !options.profiling_options.empty()) { // enable profiling by ge option if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), - sizeof(options.profiling_options.c_str())) != EOK) { + options.profiling_options.size()) != EOK) { GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); return INTERNAL_ERROR; } From e435b4ea0317c3707c9729555d112e34813bd004 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 11 Dec 2020 22:29:12 +0800 Subject: [PATCH 118/127] Profiling AR version14 --- ge/common/profiling/profiling_manager.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 457ee464..c8f728c2 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -103,7 +103,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return INTERNAL_ERROR; } is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), prof_conf.options); + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), + prof_conf.options, options.profiling_options.c_str()); } else { (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); From 71d2fb4d58f1396ea028fe8985a873755fbc7f16 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 11:38:08 +0800 Subject: [PATCH 119/127] Remove pass reporter callback to hccl --- ge/common/profiling/ge_profiling.cc | 8 +---- ge/common/profiling/profiling_manager.cc | 23 --------------- ge/init/gelib.cc | 6 +--- .../ops_kernel_builder_manager.cc | 29 +------------------ .../ops_kernel_builder_manager.h | 6 ---- 5 files changed, 3 insertions(+), 69 deletions(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 67e6b619..640f77a1 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -22,7 +22,6 @@ #include "graph/load/graph_loader.h" #include "init/gelib.h" #include "framework/common/ge_inner_error_codes.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace { const uint32_t kDeviceListIndex = 3; @@ -150,12 +149,7 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) { GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); return rt_ret; } - // Pass MsprofReporterCallback to hccl in opskernel so initialize - rt_ret = ge::OpsKernelBuilderManager::Instance().RegProfReporterCallBack(reinterpret_cast(func)); - if (rt_ret != ge::SUCCESS) { - GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); - return rt_ret; - } + // Pass MsprofReporterCallback to hccl } return ge::SUCCESS; } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index c8f728c2..456cb0a4 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -22,7 +22,6 @@ #include "graph/ge_context.h" #include "runtime/base.h" #include "graph/load/new_model_manager/davinci_model.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace { const char *const kTrainingTrace = "training_trace"; @@ -496,11 +495,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn GELOGE(FAILED, "Runtime profiler start failed."); return FAILED; } - Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStart(model_load_mask); - if (hccl_ret != SUCCESS) { - GELOGE(FAILED, "Hccl profiler start failed."); - return FAILED; - } is_load_profiling_ = true; GELOGI("Prof init: model load profiling on."); } @@ -530,12 +524,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi GELOGE(FAILED, "Runtime profiler stop failed."); return FAILED; } - - Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK); - if (hccl_ret != SUCCESS) { - GELOGE(FAILED, "Hccl profiler stop failed."); - return FAILED; - } for (auto device_id_module : device_id_module_map_) { if (device_id_module.second != 0) { uint32_t device_id = static_cast(device_id_module.first); @@ -661,12 +649,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Runtime profiler config proc failed."); return FAILED; } - - Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStart(module); - if (hccl_ret != SUCCESS) { - GELOGE(FAILED, "Hccl profiler start failed."); - return FAILED; - } if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { @@ -708,11 +690,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); return FAILED; } - Status hccl_ret = OpsKernelBuilderManager::Instance().ProfStop(module); - if (hccl_ret != SUCCESS) { - GELOGE(FAILED, "Hccl profiler stop failed."); - return FAILED; - } uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 03028e53..92700179 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -145,10 +145,6 @@ Status GELib::InnerInitialize(const map &options) { return initOpsBuilderStatus; } - if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - GE_CHK_RT_RET(rtSetDevice(options_.device_id)); - } - GELOGI("sessionManager initial."); GE_TIMESTAMP_START(SessionManagerInitialize); Status initSmStatus = sessionManager_.Initialize(options); @@ -342,7 +338,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt // set device id GELOGI("set logical device id:%u", options.device_id); GetContext().SetCtxDeviceId(static_cast(options.device_id)); - // GE_CHK_RT_RET(rtSetDevice(options.device_id)); + GE_CHK_RT_RET(rtSetDevice(options.device_id)); // In the scenario that the automatic add fusion is set, but there is no cleanaddr operator, // maybe need to check it diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index 4361df44..37bdcf7a 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -168,31 +168,4 @@ Status OpsKernelBuilderManager::GenerateTask(const Node &node, return SUCCESS; } -Status OpsKernelBuilderManager::RegProfReporterCallBack(void *func) const { - GE_CHECK_NOTNULL(func); - for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { - GE_CHK_STATUS_RET(it->second->RegProfReporterCallBack(func), - "Failed to invoke RegProfReporterCallBack."); - } - GELOGD("Done invoking RegProfReporterCallBack successfully"); - return SUCCESS; -} - -Status OpsKernelBuilderManager::ProfStart(uint64_t prof_config) const { - for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { - GE_CHK_STATUS_RET(it->second->ProfStart(prof_config), - "Failed to invoke ProfStart."); - } - GELOGD("Done invoking ProfStart successfully"); - return SUCCESS; -} - -Status OpsKernelBuilderManager::ProfStop(uint64_t prof_config) const { - for (auto it = ops_kernel_builders_.begin(); it != ops_kernel_builders_.end(); it++) { - GE_CHK_STATUS_RET(it->second->ProfStop(prof_config), - "Failed to invoke ProfStop."); - } - GELOGD("Done invoking ProfStop successfully"); - return SUCCESS; -} -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index a2d3e565..7a95ddfa 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -46,12 +46,6 @@ class OpsKernelBuilderManager { Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) const; - Status RegProfReporterCallBack(void *func) const; - - Status ProfStart(uint64_t prof_config) const; - - Status ProfStop(uint64_t prof_config) const; - private: OpsKernelBuilderManager() = default; static Status GetLibPaths(const std::map &options, std::string &lib_paths); From 1f2d63f2b9d81403aaa650bbb07271a421ab69ff Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 17:07:59 +0800 Subject: [PATCH 120/127] remove all msprof, changed to static msprof --- CMakeLists.txt | 14 +++++--------- ge/CMakeLists.txt | 2 +- ge/client/module.mk | 10 ++++++---- ge/executor/CMakeLists.txt | 1 - ge/executor/module.mk | 4 ---- ge/ge_runner.mk | 5 +---- 6 files changed, 13 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 648bb954..86d0184b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ if (ENABLE_OPEN_SRC) set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libslog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) - find_module(msprof libmsprof.so ${GE_LIB_PATH}) + find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) find_module(adump_server libadump_server.a ${GE_LIB_PATH}) find_module(runtime libruntime.so ${GE_LIB_PATH}) @@ -80,20 +80,19 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libslog.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -108,18 +107,15 @@ if (ENABLE_OPEN_SRC) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) elseif(PRODUCT STREQUAL "flr2") # flr2 ascend_hal_stub limsprof ? else() find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) - find_module(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) + find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -127,7 +123,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index e36b45d9..c2a34aba 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -779,7 +779,7 @@ target_link_libraries(opensrc_ascendcl PRIVATE register_static error_manager_static adump_server - msprofiler_fwk + msprofiler -Wl,--no-whole-archive -Wl,--no-as-needed c_sec diff --git a/ge/client/module.mk b/ge/client/module.mk index c942d550..e9d35418 100644 --- a/ge/client/module.mk +++ b/ge/client/module.mk @@ -68,9 +68,9 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ libge_compiler \ - libge_common \ - libmsprof + libge_common +LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \ LOCAL_LDFLAGS := -lrt -ldl @@ -103,8 +103,10 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libruntime \ libge_compiler \ - libge_common \ - libmsprof + libge_common + + +LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \ LOCAL_LDFLAGS := -lrt -ldl diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 29215cc1..cc5c1710 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -245,7 +245,6 @@ target_link_libraries(ge_executor_shared PRIVATE mmpa graph register - msprof error_manager ascend_hal_stub ascend_protobuf diff --git a/ge/executor/module.mk b/ge/executor/module.mk index cc64007e..34c2a37e 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -178,7 +178,6 @@ local_ge_executor_shared_library := \ libmmpa \ libgraph \ libregister \ - libmsprof \ liberror_manager \ local_ge_executor_ldflags := -lrt -ldl \ @@ -235,7 +234,6 @@ LOCAL_SHARED_LIBRARIES := \ libmmpa \ libgraph \ libregister \ - libmsprof \ liberror_manager \ stub/libascend_hal \ @@ -273,7 +271,6 @@ LOCAL_SHARED_LIBRARIES := \ libruntime \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS += $(local_ge_executor_ldflags) @@ -305,7 +302,6 @@ LOCAL_SHARED_LIBRARIES := \ libruntime \ libslog \ libmmpa \ - libmsprof \ ifeq ($(device_os),android) LOCAL_LDFLAGS += -ldl diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 5e700f2b..820404b3 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -372,7 +372,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ libadump_server \ - libmsprofiler \ + libmsprofiler_fwk \ libmmpa \ LOCAL_SHARED_LIBRARIES := \ @@ -382,7 +382,6 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ libge_common \ - libmsprof \ liberror_manager \ LOCAL_LDFLAGS := -lrt -ldl @@ -464,7 +463,6 @@ LOCAL_SHARED_LIBRARIES := \ libc_sec \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS := -lrt -ldl @@ -497,7 +495,6 @@ LOCAL_SHARED_LIBRARIES := \ libc_sec \ libslog \ libmmpa \ - libmsprof \ LOCAL_LDFLAGS := -lrt -ldl From 5823abd12fc186aecd7de3617b3b3e75b83cfb84 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 17:32:16 +0800 Subject: [PATCH 121/127] Profiling remove gerunner whole-archive --- ge/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index c2a34aba..f892a74c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -648,9 +648,7 @@ target_include_directories(ge_runner PRIVATE target_link_libraries(ge_runner $ - -Wl,--whole-archive msprofiler_fwk - -Wl,--no-whole-archive ge_memory adump_server static_mmpa From c3bf97f497abe2686ecd81ec3d261a9468cd5233 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 21:05:47 +0800 Subject: [PATCH 122/127] rename ge_proiling_cb.h to ge_runner_profiling.h --- ge/CMakeLists.txt | 2 +- .../{ge_profiling_cb.cc => ge_runner_profiling.cc} | 2 +- ge/ge_runner.mk | 1 + .../profiling/{ge_profiling_cb.h => ge_runner_profiling.h} | 6 +++--- 4 files changed, 6 insertions(+), 5 deletions(-) rename ge/common/profiling/{ge_profiling_cb.cc => ge_runner_profiling.cc} (94%) rename inc/framework/common/profiling/{ge_profiling_cb.h => ge_runner_profiling.h} (80%) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index f892a74c..a0044d23 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -61,7 +61,7 @@ set(TRAIN_SRC_LIST "common/dump/dump_properties.cc" "common/dump/dump_op.cc" "common/profiling/ge_profiling.cc" - "common/profiling/ge_profiling_cb.cc" + "common/profiling/ge_runner_profiling.cc" "engine_manager/dnnengine_manager.cc" "ge_local_engine/engine/host_cpu_engine.cc" "generator/ge_generator.cc" diff --git a/ge/common/profiling/ge_profiling_cb.cc b/ge/common/profiling/ge_runner_profiling.cc similarity index 94% rename from ge/common/profiling/ge_profiling_cb.cc rename to ge/common/profiling/ge_runner_profiling.cc index 230e129d..067aafe3 100644 --- a/ge/common/profiling/ge_profiling_cb.cc +++ b/ge/common/profiling/ge_runner_profiling.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "common/profiling/ge_profiling_cb.h" +#include "common/profiling/ge_runner_profiling.h" #include "init/gelib.h" bool IsInitialize() { diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 820404b3..c0f59320 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -30,6 +30,7 @@ LIBGE_LOCAL_SRC_FILES := \ common/dump/dump_properties.cc \ common/dump/dump_op.cc \ common/profiling/ge_profiling.cc \ + common/profiling/ge_runner_profiling.cc \ engine_manager/dnnengine_manager.cc \ ge_local_engine/engine/host_cpu_engine.cc \ generator/ge_generator.cc \ diff --git a/inc/framework/common/profiling/ge_profiling_cb.h b/inc/framework/common/profiling/ge_runner_profiling.h similarity index 80% rename from inc/framework/common/profiling/ge_profiling_cb.h rename to inc/framework/common/profiling/ge_runner_profiling.h index 025232ff..d2eff767 100644 --- a/inc/framework/common/profiling/ge_profiling_cb.h +++ b/inc/framework/common/profiling/ge_runner_profiling.h @@ -14,11 +14,11 @@ * limitations under the License. */ -#ifndef INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ -#define INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ +#ifndef INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ +#define INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ #include "profiling/ge_profiling.h" bool IsInitialize(); -#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_CB_H_ \ No newline at end of file +#endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ From b85945357069e0f3d7c611e6c2d208d10345029d Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 16:00:54 +0800 Subject: [PATCH 123/127] update cmakelist target_objects --- ge/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index a0044d23..59b804d8 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -605,7 +605,7 @@ set(INFER_SRC_LIST if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) +add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) target_compile_definitions(ge_runner PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 @@ -648,7 +648,6 @@ target_include_directories(ge_runner PRIVATE target_link_libraries(ge_runner $ - msprofiler_fwk ge_memory adump_server static_mmpa From abd0da89eec882da6f16597df92988cef6c1ca1f Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 16:36:14 +0800 Subject: [PATCH 124/127] add newline at end of file --- inc/framework/common/profiling/ge_profiling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index 10b53d6d..e56411c9 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -42,4 +42,4 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); ge::Status RegProfReporterCallback(MsprofReporterCallback func); ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); -#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ \ No newline at end of file +#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ From c9e5c8721d05c59df1184fe9edfccfbee6fd134a Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 19:23:18 +0800 Subject: [PATCH 125/127] update submodule --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index c85822cd..dba83744 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit c85822cd5404e40cb4ff2bfc9483062648c13c57 +Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299 diff --git a/parser b/parser index 5bc8c38b..ce574894 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 5bc8c38b37476e8f4b9391c96e4a2cca59e53d8e +Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a From 725a81a5a8af4a6888c92533d5df41abe91ac86d Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Sat, 12 Dec 2020 15:50:47 +0800 Subject: [PATCH 126/127] provide option to download third party software from cache server --- CMakeLists.txt | 13 +- cmake/external_libs/gflags.cmake | 1 + cmake/external_libs/gtest.cmake | 8 +- cmake/external_libs/json.cmake | 13 +- cmake/external_libs/onnx.cmake | 6 +- cmake/external_libs/protobuf_shared.cmake | 1 + cmake/external_libs/protobuf_static.cmake | 1 + cmake/external_libs/protoc.cmake | 231 +++++++++++----------- cmake/external_libs/securec.cmake | 13 +- 9 files changed, 158 insertions(+), 129 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc4102fe..021e7798 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,11 @@ endif() if(DEFINED ENV{D_PKG_SERVER}) set(GE_PB_PKG $ENV{D_PKG_SERVER}) - message("Download packages from PKG server") -endif() + message("Download packages from DPKG server") +elseif(DEFINED ENV{MSLIBS_SERVER}) + set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081") + message("Download packages from MSPKG server") +endif () set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) @@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) - #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) + #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) @@ -127,10 +130,10 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() - message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") + message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() - if (ENABLE_GE_COV OR ENABLE_GE_UT) + if (ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) endif() diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake index f3f0f0ef..50cfb2bc 100755 --- a/cmake/external_libs/gflags.cmake +++ b/cmake/external_libs/gflags.cmake @@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index 96ea84b4..c5edcd72 100755 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz") + set(MD5 "") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") set(MD5 "") else() @@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(gtest_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest - -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON + -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install EXCLUDE_FROM_ALL TRUE diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index ce473d4b..3c1cd012 100755 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -5,10 +5,14 @@ endif() include(ExternalProject) set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) -if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") - set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") - set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") + set(MD5 "0dc903888211db3a0f170304cd9f3a89") + set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) +#elseif (ENABLE_GITEE) +# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") +# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") +#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") else() set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") set(MD5 "0dc903888211db3a0f170304cd9f3a89") @@ -18,6 +22,7 @@ ExternalProject_Add(json_build URL ${REQ_URL} #URL /home/txd/workspace/cloud_code/pkg/include.zip SOURCE_DIR ${JSON_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake index 9dadb544..1ee80d2d 100755 --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") + set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") set(MD5 "1bdbcecdd68ea8392630467646776e02") else() @@ -19,6 +22,7 @@ ExternalProject_Add(onnx #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 #SOURCE_DIR ${ONNX_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" #INSTALL_COMMAND "" diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake index c9c6b7d9..6334c8a3 100755 --- a/cmake/external_libs/protobuf_shared.cmake +++ b/cmake/external_libs/protobuf_shared.cmake @@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(protobuf_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index 6f3e1f53..e4bbb9a0 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake index 0d162c0d..58321f04 100755 --- a/cmake/external_libs/protoc.cmake +++ b/cmake/external_libs/protoc.cmake @@ -1,115 +1,116 @@ -if (HAVE_PROTOC) - return() -endif() - -include(ExternalProject) -include(GNUInstallDirs) -#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) - -if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR - (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) - set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) - message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") -endif() - -if(GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") -else() - if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") - else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") - endif () -endif() - -set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") -set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") -ExternalProject_Add(protoc_build - URL ${REQ_URL} - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz - #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 - CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake - BUILD_COMMAND $(MAKE) - INSTALL_COMMAND $(MAKE) install - EXCLUDE_FROM_ALL TRUE -) - -set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) - -set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) - -function(protobuf_generate comp c_var h_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate() called without any proto files") - return() - endif() - set(${c_var}) - set(${h_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") - list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) - set(${c_var} ${${c_var}} PARENT_SCOPE) - set(${h_var} ${${h_var}} PARENT_SCOPE) - -endfunction() - -function(protobuf_generate_py comp py_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") - return() - endif() - set(${py_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}_pb2.py" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) - set(${py_var} ${${py_var}} PARENT_SCOPE) - -endfunction() - -#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") -set(HAVE_PROTOC TRUE) +if (HAVE_PROTOC) + return() +endif() + +include(ExternalProject) +include(GNUInstallDirs) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +if(GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") +else() + if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") + set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + else() + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") + set(MD5 "3d9e32700639618a4d2d342c99d4507a") + endif () +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +ExternalProject_Add(protoc_build + URL ${REQ_URL} + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF + CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) + +set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) + +function(protobuf_generate comp c_var h_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate() called without any proto files") + return() + endif() + set(${c_var}) + set(${h_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") + list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) + set(${c_var} ${${c_var}} PARENT_SCOPE) + set(${h_var} ${${h_var}} PARENT_SCOPE) + +endfunction() + +function(protobuf_generate_py comp py_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") + return() + endif() + set(${py_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}_pb2.py" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) + set(${py_var} ${${py_var}} PARENT_SCOPE) + +endfunction() + +#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") +set(HAVE_PROTOC TRUE) diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 0bd62ab2..0f8b6d3a 100755 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") + set(MD5 "") +else() + set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") + set(MD5 "") +endif () + ExternalProject_Add(c_sec_build - URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + URL ${REQ_URL} + #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} From faed0539727a366986497655c257a652c4d96f16 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 17 Dec 2020 19:40:28 +0800 Subject: [PATCH 127/127] synchronize latest ascend software suite 17 Dec 2020 --- inc/external/acl/acl.h | 73 + inc/external/acl/acl_base.h | 36 + inc/external/acl/acl_mdl.h | 1112 ++++++++ inc/external/acl/acl_prof.h | 323 +++ inc/external/acl/acl_rt.h | 932 +++++++ inc/external/acl/acl_tdt.h | 276 ++ inc/external/acl/error_codes/rt_error_codes.h | 1 + inc/external/acl/ops/acl_dvpp.h | 2389 +++++++++++++++++ inc/external/hccl/hccl.h | 134 + inc/external/hccl/hccl_types.h | 101 + inc/external/runtime/rt_error_codes.h | 1 + .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 60 + .../fwkacllib/inc/cce/aicpu_engine_struct.h | 8 +- .../fwkacllib/inc/cce/fwk_adpt_struct.h | 17 +- third_party/fwkacllib/inc/hccl/base.h | 30 +- third_party/fwkacllib/inc/hccl/hccl_types.h | 101 + third_party/fwkacllib/inc/hccl/hcom.h | 115 + .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 14 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 5 + third_party/fwkacllib/inc/runtime/base.h | 27 + third_party/fwkacllib/inc/runtime/config.h | 15 - third_party/fwkacllib/inc/tdt/tsd_client.h | 28 +- .../fwkacllib/inc/toolchain/prof_acl_api.h | 410 +-- .../fwkacllib/inc/toolchain/prof_callback.h | 132 + .../fwkacllib/inc/toolchain/prof_reporter.h | 16 +- 25 files changed, 5956 insertions(+), 400 deletions(-) create mode 100644 inc/external/acl/acl.h create mode 100644 inc/external/acl/acl_mdl.h create mode 100644 inc/external/acl/acl_prof.h create mode 100644 inc/external/acl/acl_rt.h create mode 100644 inc/external/acl/acl_tdt.h create mode 100644 inc/external/acl/ops/acl_dvpp.h create mode 100644 inc/external/hccl/hccl.h create mode 100644 inc/external/hccl/hccl_types.h create mode 100644 third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h create mode 100644 third_party/fwkacllib/inc/hccl/hccl_types.h create mode 100644 third_party/fwkacllib/inc/toolchain/prof_callback.h diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h new file mode 100644 index 00000000..ef5b4772 --- /dev/null +++ b/inc/external/acl/acl.h @@ -0,0 +1,73 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_H_ +#define INC_EXTERNAL_ACL_ACL_H_ + +#include "acl_rt.h" +#include "acl_op.h" +#include "acl_mdl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Current version is 1.0.0 +#define ACL_MAJOR_VERSION 1 +#define ACL_MINOR_VERSION 0 +#define ACL_PATCH_VERSION 0 + +/** + * @ingroup AscendCL + * @brief acl initialize + * + * @par Restriction + * The aclInit interface can be called only once in a process + * @param configPath [IN] the config path,it can be NULL + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); + +/** + * @ingroup AscendCL + * @brief acl finalize + * + * @par Restriction + * Need to call aclFinalize before the process exits. + * After calling aclFinalize,the services cannot continue to be used normally. + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclFinalize(); + +/** + * @ingroup AscendCL + * @brief query ACL interface version + * + * @param majorVersion[OUT] ACL interface major version + * @param minorVersion[OUT] ACL interface minor version + * @param patchVersion[OUT] ACL interface patch version + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index debadcfd..c1341d59 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -223,6 +223,29 @@ ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); */ ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); +/** + * @ingroup AscendCL + * @brief update new data of aclDataBuffer + * + * @param dataBuffer [OUT] pointer to aclDataBuffer + * @li The old data need to be released by the user, otherwise it may occur memory leak leakage + * call aclGetDataBufferAddr interface to get old data address + * call aclrtFree interface to release memory + * + * @param data [IN] pointer to new data + * @li Need to be managed by the user, + * call aclrtMalloc interface to apply for memory, + * call aclrtFree interface to release memory + * + * @param size [IN] size of data in bytes + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr + */ +ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); + /** * @ingroup AscendCL * @brief get data address from aclDataBuffer @@ -547,6 +570,19 @@ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); */ ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); +/** + * @ingroup AscendCL + * @brief Set const data specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param dataBuffer [IN] pointer to the const databuffer + * @param length [IN] the length of const databuffer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); + /** * @ingroup AscendCL * @brief an interface for users to output APP logs diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h new file mode 100644 index 00000000..0652358d --- /dev/null +++ b/inc/external/acl/acl_mdl.h @@ -0,0 +1,1112 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_ +#define INC_EXTERNAL_ACL_ACL_MODEL_H_ + +#include +#include + +#include "acl_base.h" +#include "acl_rt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_MAX_DIM_CNT 128 +#define ACL_MAX_TENSOR_NAME_LEN 128 +#define ACL_MAX_BATCH_NUM 128 +#define ACL_MAX_HW_NUM 128 +#define ACL_MAX_SHAPE_COUNT 128 +#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF + +#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" +#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" + +typedef struct aclmdlDataset aclmdlDataset; +typedef struct aclmdlDesc aclmdlDesc; +typedef struct aclmdlAIPP aclmdlAIPP; +typedef struct aclAippExtendInfo aclAippExtendInfo; + +typedef enum { + ACL_YUV420SP_U8 = 1, + ACL_XRGB8888_U8, + ACL_RGB888_U8, + ACL_YUV400_U8, + ACL_NC1HWC0DI_FP16, + ACL_NC1HWC0DI_S8, + ACL_ARGB8888_U8, + ACL_YUYV_U8, + ACL_YUV422SP_U8, + ACL_AYUV444_U8, + ACL_RAW10, + ACL_RAW12, + ACL_RAW16, + ACL_RAW24, + ACL_AIPP_RESERVED = 0xffff, +} aclAippInputFormat; + +typedef enum { + ACL_DATA_WITHOUT_AIPP = 0, + ACL_DATA_WITH_STATIC_AIPP, + ACL_DATA_WITH_DYNAMIC_AIPP, + ACL_DYNAMIC_AIPP_NODE +} aclmdlInputAippType; + +typedef struct aclmdlIODims { + char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ + size_t dimCount; /**< dim array count */ + int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ +} aclmdlIODims; + +typedef struct aclAippDims { + aclmdlIODims srcDims; /**< input dims before model transform */ + size_t srcSize; /**< input size before model transform */ + aclmdlIODims aippOutdims; /**< aipp output dims */ + size_t aippOutSize; /**< aipp output size */ +} aclAippDims; + +typedef struct aclmdlBatch { + size_t batchCount; /**< batch array count */ + uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ +} aclmdlBatch; + +typedef struct aclmdlHW { + size_t hwCount; /**< height&width array count */ + uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ +} aclmdlHW; + +typedef struct aclAippInfo { + aclAippInputFormat inputFormat; + int32_t srcImageSizeW; + int32_t srcImageSizeH; + int8_t cropSwitch; + int32_t loadStartPosW; + int32_t loadStartPosH; + int32_t cropSizeW; + int32_t cropSizeH; + int8_t resizeSwitch; + int32_t resizeOutputW; + int32_t resizeOutputH; + int8_t paddingSwitch; + int32_t leftPaddingSize; + int32_t rightPaddingSize; + int32_t topPaddingSize; + int32_t bottomPaddingSize; + int8_t cscSwitch; + int8_t rbuvSwapSwitch; + int8_t axSwapSwitch; + int8_t singleLineMode; + int32_t matrixR0C0; + int32_t matrixR0C1; + int32_t matrixR0C2; + int32_t matrixR1C0; + int32_t matrixR1C1; + int32_t matrixR1C2; + int32_t matrixR2C0; + int32_t matrixR2C1; + int32_t matrixR2C2; + int32_t outputBias0; + int32_t outputBias1; + int32_t outputBias2; + int32_t inputBias0; + int32_t inputBias1; + int32_t inputBias2; + int32_t meanChn0; + int32_t meanChn1; + int32_t meanChn2; + int32_t meanChn3; + float minChn0; + float minChn1; + float minChn2; + float minChn3; + float varReciChn0; + float varReciChn1; + float varReciChn2; + float varReciChn3; + aclFormat srcFormat; + aclDataType srcDatatype; + size_t srcDimNum; + size_t shapeCount; + aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; + aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ +} aclAippInfo; + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlDesc + * + * @retval the aclmdlDesc pointer + */ +ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc(); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlDesc + * + * @param modelDesc [IN] Pointer to almdldlDesc to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get aclmdlDesc data of the model according to the model ID + * + * @param modelDesc [OUT] aclmdlDesc pointer + * @param modelId [IN] model id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief Get the number of the inputs of + * the model according to data of aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * + * @retval input size with aclmdlDesc + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get the number of the output of + * the model according to data of aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * + * @retval output size with aclmdlDesc + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified input according to + * the data of type aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * @param index [IN] the size of the number of inputs to be obtained, + * the index value starts from 0 + * + * @retval Specify the size of the input + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified output according to + * the data of type aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * @param index [IN] the size of the number of outputs to be obtained, + * the index value starts from 0 + * + * @retval Specify the size of the output + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlDataset + * + * @retval the aclmdlDataset pointer + */ +ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset(); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlDataset + * + * @param dataset [IN] Pointer to aclmdlDataset to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Add aclDataBuffer to aclmdlDataset + * + * @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added + * @param dataBuffer [IN] aclDataBuffer address to be added + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief Get the number of aclDataBuffer in aclmdlDataset + * + * @param dataset [IN] aclmdlDataset poiter + * + * @retval the number of aclDataBuffer + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Get the aclDataBuffer in aclmdlDataset by index + * + * @param dataset [IN] aclmdlDataset poiter + * @param index [IN] the index of aclDataBuffer + * + * @retval Get successfully, return the address of aclDataBuffer + * @retval Failure return NULL + */ +ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index); + +/** + * @ingroup AscendCL + * @brief Load offline model data from files + * and manage memory internally by the system + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * + * @param modelPath [IN] Storage path for offline model files + * @param modelId [OUT] Model ID generated after + * the system finishes loading the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId); + +/** + * @ingroup AscendCL + * @brief Load offline model data from memory and manage the memory of + * model running internally by the system + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * + * @param model [IN] Model data stored in memory + * @param modelSize [IN] model data size + * @param modelId [OUT] Model ID generated after + * the system finishes loading the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); + +/** + * @ingroup AscendCL + * @brief Load offline model data from a file, + * and the user manages the memory of the model run by itself + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations. + * @param modelPath [IN] Storage path for offline model files + * @param modelId [OUT] Model ID generated after finishes loading the model + * @param workPtr [IN] A pointer to the working memory + * required by the model on the Device,can be null + * @param workSize [IN] The amount of working memory required by the model + * @param weightPtr [IN] Pointer to model weight memory on Device + * @param weightSize [IN] The amount of weight memory required by the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, + size_t workSize, void *weightPtr, size_t weightSize); + +/** + * @ingroup AscendCL + * @brief Load offline model data from memory, + * and the user can manage the memory of model running + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * @param model [IN] Model data stored in memory + * @param modelSize [IN] model data size + * @param modelId [OUT] Model ID generated after finishes loading the model + * @param workPtr [IN] A pointer to the working memory + * required by the model on the Device,can be null + * @param workSize [IN] work memory size + * @param weightPtr [IN] Pointer to model weight memory on Device,can be null + * @param weightSize [IN] The amount of weight memory required by the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, + void *workPtr, size_t workSize, void *weightPtr, + size_t weightSize); + +/** + * @ingroup AscendCL + * @brief load model from file with async queue + * + * @param modelPath [IN] model path + * @param modelId [OUT] return model id if load success + * @param inputQ [IN] input queue pointer + * @param inputQNum [IN] input queue num + * @param outputQ [IN] output queue pointer + * @param outputQNum [IN] output queue num + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ, + size_t inputQNum, const uint32_t *outputQ, size_t outputQNum); + +/** + * @ingroup AscendCL + * @brief load model from memory with async queue + * + * @param model [IN] model memory which user manages + * @param modelSize [IN] model size + * @param modelId [OUT] return model id if load success + * @param inputQ [IN] input queue pointer + * @param inputQNum [IN] input queue num + * @param outputQ [IN] output queue pointer + * @param outputQNum [IN] output queue num + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, + const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, + size_t outputQNum); + +/** + * @ingroup AscendCL + * @brief Execute model synchronous inference until the inference result is returned + * + * @param modelId [IN] ID of the model to perform inference + * @param input [IN] Input data for model inference + * @param output [OUT] Output data for model inference + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output); + +/** + * @ingroup AscendCL + * @brief Execute model asynchronous inference until the inference result is returned + * + * @param modelId [IN] ID of the model to perform inference + * @param input [IN] Input data for model inference + * @param output [OUT] Output data for model inference + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem + */ +ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief unload model with model id + * + * @param modelId [IN] model id to be unloaded + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief Get the weight memory size and working memory size + * required for model execution according to the model file + * + * @param fileName [IN] Model path to get memory information + * @param workSize [OUT] The amount of working memory for model executed + * @param weightSize [OUT] The amount of weight memory for model executed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize); + +/** + * @ingroup AscendCL + * @brief Obtain the weights required for + * model execution according to the model data in memory + * + * @par Restriction + * The execution and weight memory is Device memory, + * and requires user application and release. + * @param model [IN] model memory which user manages + * @param modelSize [IN] model data size + * @param workSize [OUT] The amount of working memory for model executed + * @param weightSize [OUT] The amount of weight memory for model executed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize, + size_t *weightSize); + +/** + * @ingroup AscendCL + * @brief In dynamic batch scenarios, + * it is used to set the number of images processed + * at one time during model inference + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic tensor + * @param batchSize [IN] Number of images processed at a time during model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, + uint64_t batchSize); + +/** + * @ingroup AscendCL + * @brief Sets the H and W of the specified input of the model + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic tensor + * @param height [IN] model height + * @param width [IN] model width + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, + uint64_t height, uint64_t width); + +/** + * @ingroup AscendCL + * @brief Sets the dynamic dims of the specified input of the model + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic dims + * @param dims [IN] value of dynamic dims + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get input dims info + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlGetInputDimsV2 + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get input dims info(version 2), especially for static aipp + * it is the same with aclmdlGetInputDims while model without static aipp + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlGetInputDims + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get output dims info + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get current output dims info + * + * @par Function + * The following use cases are supported: + * @li Get current output shape when model is dynamic and + * dynamic shape info is set + * @li Get max output shape when model is dynamic and + * dynamic shape info is not set + * @li Get actual output shape when model is static + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get input name by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor name,the same life cycle with modelDesc + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output name by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor name,the same life cycle with modelDesc + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input format by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor format + */ +ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output format by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor format + */ +ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input data type by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor data type + */ +ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output data type by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor data type + */ +ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input tensor index by name + * + * @param modelDesc [IN] model description + * @param name [IN] intput tensor name + * @param index [OUT] intput tensor index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); + +/** + * @ingroup AscendCL + * @brief get output tensor index by name + * + * @param modelDesc [IN] model description + * @param name [IN] output tensor name + * @param index [OUT] output tensor index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); + +/** + * @ingroup AscendCL + * @brief get dynamic batch info + * + * @param modelDesc [IN] model description + * @param batch [OUT] dynamic batch info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch); + +/** + * @ingroup AscendCL + * @brief get dynamic height&width info + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param hw [OUT] dynamic height&width info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw); + +/** + * @ingroup AscendCL + * @brief get dynamic gear count + * + * @param modelDesc [IN] model description + * @param index [IN] unused, must be -1 + * @param gearCount [OUT] dynamic gear count + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index, + size_t *gearCount); + +/** + * @ingroup AscendCL + * @brief get dynamic dims info + * + * @param modelDesc [IN] model description + * @param index [IN] unused, must be -1 + * @param dims [OUT] value of dynamic dims + * @param gearCount [IN] dynamic gear count + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims, + size_t gearCount); + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlAIPP + * + * @param batchSize [IN] batchsizes of model + * + * @retval the aclmdlAIPP pointer + */ +ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlAIPP + * + * @param aippParmsSet [IN] Pointer for aclmdlAIPP to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief set InputFormat of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param inputFormat [IN] The inputFormat of aipp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat); + +/** + * @ingroup AscendCL + * @brief set cscParms of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param csc_switch [IN] Csc switch + * @param cscMatrixR0C0 [IN] Csc_matrix_r0_c0 + * @param cscMatrixR0C1 [IN] Csc_matrix_r0_c1 + * @param cscMatrixR0C2 [IN] Csc_matrix_r0_c2 + * @param cscMatrixR1C0 [IN] Csc_matrix_r1_c0 + * @param cscMatrixR1C1 [IN] Csc_matrix_r1_c1 + * @param cscMatrixR1C2 [IN] Csc_matrix_r1_c2 + * @param cscMatrixR2C0 [IN] Csc_matrix_r2_c0 + * @param cscMatrixR2C1 [IN] Csc_matrix_r2_c1 + * @param cscMatrixR2C2 [IN] Csc_matrix_r2_c2 + * @param cscOutputBiasR0 [IN] Output Bias for RGB to YUV, element of row 0, unsigned number + * @param cscOutputBiasR1 [IN] Output Bias for RGB to YUV, element of row 1, unsigned number + * @param cscOutputBiasR2 [IN] Output Bias for RGB to YUV, element of row 2, unsigned number + * @param cscInputBiasR0 [IN] Input Bias for YUV to RGB, element of row 0, unsigned number + * @param cscInputBiasR1 [IN] Input Bias for YUV to RGB, element of row 1, unsigned number + * @param cscInputBiasR2 [IN] Input Bias for YUV to RGB, element of row 2, unsigned number + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, + int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, + int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, + int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, + uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, + uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, + uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); + +/** + * @ingroup AscendCL + * @brief set rb/ub swap switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param rbuvSwapSwitch [IN] rb/ub swap switch + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); + +/** + * @ingroup AscendCL + * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param axSwapSwitch [IN] RGBA->ARGB, YUVA->AYUV swap switch + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); + +/** + * @ingroup AscendCL + * @brief set source image of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param srcImageSizeW [IN] Source image width + * @param srcImageSizeH [IN] Source image height + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, + int32_t srcImageSizeH); + +/** + * @ingroup AscendCL + * @brief set resize switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param scfSwitch [IN] Resize switch + * @param scfInputSizeW [IN] Input width of scf + * @param scfInputSizeH [IN] Input height of scf + * @param scfOutputSizeW [IN] Output width of scf + * @param scfOutputSizeH [IN] Output height of scf + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, + int32_t scfInputSizeH, int32_t scfOutputSizeW, + int32_t scfOutputSizeH, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set cropParams of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param cropSwitch [IN] Crop switch + * @param cropStartPosW [IN] The start horizontal position of cropping + * @param cropStartPosH [IN] The start vertical position of cropping + * @param cropSizeW [IN] Crop width + * @param cropSizeH [IN] Crop height + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, + int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, + uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set paddingParams of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param paddingSwitch [IN] Padding switch + * @param paddingSizeTop [IN] Top padding size + * @param paddingSizeBottom [IN] Bottom padding size + * @param paddingSizeLeft [IN] Left padding size + * @param paddingSizeRight [IN] Right padding size + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, + int32_t paddingSizeTop, int32_t paddingSizeBottom, + int32_t paddingSizeLeft, int32_t paddingSizeRight, + uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set DtcPixelMean of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelMeanChn0 [IN] Mean value of channel 0 + * @param dtcPixelMeanChn1 [IN] Mean value of channel 1 + * @param dtcPixelMeanChn2 [IN] Mean value of channel 2 + * @param dtcPixelMeanChn3 [IN] Mean value of channel 3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, + int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, + int16_t dtcPixelMeanChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set DtcPixelMin of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelMinChn0 [IN] Min value of channel 0 + * @param dtcPixelMinChn1 [IN] Min value of channel 1 + * @param dtcPixelMinChn2 [IN] Min value of channel 2 + * @param dtcPixelMinChn3 [IN] Min value of channel 3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, + float dtcPixelMinChn1, float dtcPixelMinChn2, + float dtcPixelMinChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set PixelVarReci of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0 + * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1 + * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2 + * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, + float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, + float dtcPixelVarReciChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set aipp parameters to model + * + * @param modelId [IN] model id + * @param dataset [IN] Pointer of dataset + * @param index [IN] index of input for aipp data(ACL_DYNAMIC_AIPP_NODE) + * @param aippParmsSet [IN] Pointer for aclmdlAIPP + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief set aipp parameters to model + * + * @param modelId [IN] model id + * @param dataset [IN] Pointer of dataset + * @param index [IN] index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP) + * @param aippParmsSet [IN] Pointer for aclmdlAIPP + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief get input aipp type + * + * @param modelId [IN] model id + * @param index [IN] index of input + * @param type [OUT] aipp type for input.refrer to aclmdlInputAippType(enum) + * @param dynamicAttachedDataIndex [OUT] index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE) + * valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, + size_t *dynamicAttachedDataIndex); + +/** + * @ingroup AscendCL + * @brief get static aipp parameters from model + * + * @param modelId [IN] model id + * @param index [IN] index of tensor + * @param aippinfo [OUT] Pointer for static aipp info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); + +/** + * @ingroup AscendCL + * @brief get op description info + * + * @param deviceId [IN] device id + * @param streamId [IN] stream id + * @param taskId [IN] task id + * @param opName [OUT] pointer to op name + * @param opNameLen [IN] the length of op name + * @param inputDesc [OUT] pointer to input description + * @param numInputs [OUT] the number of input tensor + * @param outputDesc [OUT] pointer to output description + * @param numOutputs [OUT] the number of output tensor + * + * @retval ACL_SUCCESS The function is successfully executed + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, + char *opName, size_t opNameLen, aclTensorDesc **inputDesc, + size_t *numInputs, aclTensorDesc **outputDesc, + size_t *numOutputs); + +/** + * @ingroup AscendCL + * @brief init dump + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); + +/** + * @ingroup AscendCL + * @brief set param of dump + * + * @param dumpCfgPath [IN] the path of dump config + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); + +/** + * @ingroup AscendCL + * @brief finalize dump. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h new file mode 100644 index 00000000..65c55290 --- /dev/null +++ b/inc/external/acl/acl_prof.h @@ -0,0 +1,323 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_PROF_H_ +#define INC_EXTERNAL_ACL_PROF_H_ + +#include "acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_PROF_ACL_API 0x0001 +#define ACL_PROF_TASK_TIME 0x0002 +#define ACL_PROF_AICORE_METRICS 0x0004 +#define ACL_PROF_AICPU_TRACE 0x0008 + +#define ACL_PROF_MAX_OP_NAME_LEN 257 +#define ACL_PROF_MAX_OP_TYPE_LEN 65 + +typedef enum { + ACL_AICORE_ARITHMATIC_THROUGHPUT = 0, + ACL_AICORE_ARITHMETIC_UTILIZATION = 0, + ACL_AICORE_PIPE_UTILIZATION = 1, + ACL_AICORE_MEMORY_BANDWIDTH = 2, + ACL_AICORE_L0B_AND_WIDTH = 3, + ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, + ACL_AICORE_NONE = 0xFF +} aclprofAicoreMetrics; + +typedef struct aclprofConfig aclprofConfig; +typedef struct aclprofStopConfig aclprofStopConfig; +typedef struct aclprofAicoreEvents aclprofAicoreEvents; +typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; + +/** + * @ingroup AscendCL + * @brief profiling initialize + * + * @param profilerResultPath [IN] path of profiling result + * @param length [IN] length of profilerResultPath + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofFinalize + */ +ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); + +/** + * @ingroup AscendCL + * @brief profiling finalize + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofInit + */ +ACL_FUNC_VISIBILITY aclError aclprofFinalize(); + +/** + * @ingroup AscendCL + * @brief Start profiling modules by profilerConfig + * + * @param profilerConfig [IN] config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofStop + */ +ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief Create data of type aclprofConfig + * + * @param deviceIdList [IN] list of device id + * @param deviceNums [IN] number of devices + * @param aicoreMetrics [IN] type of aicore metrics + * @param aicoreEvents [IN] pointer to aicore events, only support NULL now + * @param dataTypeConfig [IN] config modules need profiling + * + * @retval the aclprofConfig pointer + * + * @see aclprofDestroyConfig + */ +ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, + aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); + +/** + * @ingroup AscendCL + * @brief Destroy data of type aclprofConfig + * + * @param profilerConfig [IN] config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofCreateConfig + */ +ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief stop profiling modules by stopProfilingConfig + * + * @param profilerConfig [IN] pointer to stop config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofStart + */ +ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief subscribe profiling data of model + * + * @param modelId [IN] the model id subscribed + * @param profSubscribeConfig [IN] pointer to config of model subscribe + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofModelUnSubscribe + */ +ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); + +/** + * @ingroup AscendCL + * @brief unsubscribe profiling data of model + * + * @param modelId [IN] the model id unsubscribed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofModelSubscribe + */ +ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief create subscribe config + * + * @param timeInfoSwitch [IN] switch whether get time info from model + * @param aicoreMetrics [IN] aicore metrics + * @param fd [IN] pointer to write pipe + * + * @retval the aclprofSubscribeConfig pointer + * + * @see aclprofDestroySubscribeConfig + */ +ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, + aclprofAicoreMetrics aicoreMetrics, void *fd); + +/** + * @ingroup AscendCL + * @brief destroy subscribe config + * + * @param profSubscribeConfig [IN] subscribe config + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofCreateSubscribeConfig + */ +ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); + +/** + * @ingroup AscendCL + * @brief create subscribe config + * + * @param opDescSize [OUT] size of op desc + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); + +/** + * @ingroup AscendCL + * @brief get op number from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param opNumber [OUT] op number of subscription data + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); + +/** + * @ingroup AscendCL + * @brief get op type from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opType [OUT] obtained op type string + * @param opTypeLen [IN] obtained length of op type string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, + size_t opTypeLen); + +/** + * @ingroup AscendCL + * @brief get op type from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opName [OUT] obtained op name string + * @param opNameLen [IN] obtained length of op name string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, + size_t opNameLen); + +/** + * @ingroup AscendCL + * @brief get start time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval start time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get end time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval end time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get excution time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval execution time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get model id from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * + * @retval model id of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get cube ops from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval cube ops of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpCubeOps(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get vector ops from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval vector ops of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpVectorOps(const void *opInfo, size_t opInfoLen, uint32_t index); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_PROF_H_ \ No newline at end of file diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h new file mode 100644 index 00000000..eb6b4240 --- /dev/null +++ b/inc/external/acl/acl_rt.h @@ -0,0 +1,932 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_RT_H_ +#define INC_EXTERNAL_ACL_ACL_RT_H_ + +#include +#include +#include "acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum aclrtRunMode { + ACL_DEVICE, + ACL_HOST, +} aclrtRunMode; + +typedef enum aclrtTsId { + ACL_TS_ID_AICORE = 0, + ACL_TS_ID_AIVECTOR = 1, + ACL_TS_ID_RESERVED = 2, +} aclrtTsId; + +typedef enum aclrtEventStatus { + ACL_EVENT_STATUS_COMPLETE = 0, + ACL_EVENT_STATUS_NOT_READY = 1, + ACL_EVENT_STATUS_RESERVED = 2, +} aclrtEventStatus; + +typedef enum aclrtCallbackBlockType { + ACL_CALLBACK_NO_BLOCK, + ACL_CALLBACK_BLOCK, +} aclrtCallbackBlockType; + +typedef enum aclrtMemcpyKind { + ACL_MEMCPY_HOST_TO_HOST, + ACL_MEMCPY_HOST_TO_DEVICE, + ACL_MEMCPY_DEVICE_TO_HOST, + ACL_MEMCPY_DEVICE_TO_DEVICE, +} aclrtMemcpyKind; + +typedef enum aclrtMemMallocPolicy { + ACL_MEM_MALLOC_HUGE_FIRST, + ACL_MEM_MALLOC_HUGE_ONLY, + ACL_MEM_MALLOC_NORMAL_ONLY, + ACL_MEM_MALLOC_HUGE_FIRST_P2P, + ACL_MEM_MALLOC_HUGE_ONLY_P2P, + ACL_MEM_MALLOC_NORMAL_ONLY_P2P, +} aclrtMemMallocPolicy; + +typedef enum aclrtMemAttr { + ACL_DDR_MEM, + ACL_HBM_MEM, + ACL_DDR_MEM_HUGE, + ACL_DDR_MEM_NORMAL, + ACL_HBM_MEM_HUGE, + ACL_HBM_MEM_NORMAL, + ACL_DDR_MEM_P2P_HUGE, + ACL_DDR_MEM_P2P_NORMAL, + ACL_HBM_MEM_P2P_HUGE, + ACL_HBM_MEM_P2P_NORMAL, +} aclrtMemAttr; + +typedef enum aclrtGroupAttr { + ACL_GROUP_AICORE_INT, + ACL_GROUP_AIV_INT, + ACL_GROUP_AIC_INT, + ACL_GROUP_SDMANUM_INT, + ACL_GROUP_ASQNUM_INT +} aclrtGroupAttr; + +typedef struct tagRtGroupInfo aclrtGroupInfo; + +typedef struct rtExceptionInfo aclrtExceptionInfo; + +typedef void (*aclrtCallback)(void *userData); + +typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); + +/** + * @ingroup AscendCL + * @brief Set a callback function to handle exception information + * + * @param callback [IN] callback function to handle exception information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); + +/** + * @ingroup AscendCL + * @brief Get task id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The task id from exception information + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get stream id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The stream id from exception information + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get thread id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The thread id of fail task + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get device id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The thread id of fail task + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief The thread that handles the callback function on the Stream + * + * @param threadId [IN] thread ID + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Add a callback function to be executed on the host + * to the task queue of the Stream + * + * @param fn [IN] Specify the callback function to be added + * The function prototype of the callback function is: + * typedef void (*aclrtCallback)(void *userData); + * @param userData [IN] User data to be passed to the callback function + * @param blockType [IN] callback block type + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief After waiting for a specified time, trigger callback processing + * + * @par Function + * The thread processing callback specified by + * the aclrtSubscribeReport interface + * + * @param timeout [IN] timeout value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSubscribeReport + */ +ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); + +/** + * @ingroup AscendCL + * @brief Cancel thread registration, + * the callback function on the specified Stream + * is no longer processed by the specified thread + * + * @param threadId [IN] thread ID + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create context and associates it with the calling thread + * + * @par Function + * The following use cases are supported: + * @li If you don't call the aclrtCreateContext interface + * to explicitly create the context, + * the system will use the default context, which is implicitly created + * when the aclrtSetDevice interface is called. + * @li If multiple contexts are created in a process + * (there is no limit on the number of contexts), + * the current thread can only use one of them at the same time. + * It is recommended to explicitly specify the context of the current thread + * through the aclrtSetCurrentContext interface to increase. + * the maintainability of the program. + * + * @param context [OUT] point to the created context + * @param deviceId [IN] device to create context on + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSetDevice | aclrtSetCurrentContext + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief destroy context instance + * + * @par Function + * Can only destroy context created through aclrtCreateContext interface + * + * @param context [IN] the context to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateContext + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); + +/** + * @ingroup AscendCL + * @brief set the context of the thread + * + * @par Function + * The following scenarios are supported: + * @li If the aclrtCreateContext interface is called in a thread to explicitly + * create a Context (for example: ctx1), the thread's Context can be specified + * without calling the aclrtSetCurrentContext interface. + * The system uses ctx1 as the context of thread1 by default. + * @li If the aclrtCreateContext interface is not explicitly created, + * the system uses the default context as the context of the thread. + * At this time, the aclrtDestroyContext interface cannot be used to release + * the default context. + * @li If the aclrtSetCurrentContext interface is called multiple times to + * set the thread's Context, the last one prevails. + * + * @par Restriction + * @li If the cevice corresponding to the context set for the thread + * has been reset, you cannot set the context as the context of the thread, + * otherwise a business exception will result. + * @li It is recommended to use the context created in a thread. + * If the aclrtCreateContext interface is called in thread A to create a context, + * and the context is used in thread B, + * the user must guarantee the execution order of tasks in the same stream + * under the same context in two threads. + * + * @param context [IN] the current context of the thread + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateContext | aclrtDestroyContext + */ +ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); + +/** + * @ingroup AscendCL + * @brief get the context of the thread + * + * @par Function + * If the user calls the aclrtSetCurrentContext interface + * multiple times to set the context of the current thread, + * then the last set context is obtained + * + * @param context [OUT] the current context of the thread + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSetCurrentContext + */ +ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); + +/** + * @ingroup AscendCL + * @brief Specify the device to use for the operation + * implicitly create the default context and the default stream + * + * @par Function + * The following use cases are supported: + * @li Device can be specified in the process or thread. + * If you call the aclrtSetDevice interface multiple + * times to specify the same device, + * you only need to call the aclrtResetDevice interface to reset the device. + * @li The same device can be specified for operation + * in different processes or threads. + * @li Device is specified in a process, + * and multiple threads in the process can share this device to explicitly + * create a Context (aclrtCreateContext interface). + * @li In multi-device scenarios, you can switch to other devices + * through the aclrtSetDevice interface in the process. + * + * @param deviceId [IN] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtResetDevice |aclrtCreateContext + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief Reset the current operating Device and free resources on the device, + * including the default context, the default stream, + * and all streams created under the default context, + * and synchronizes the interface. + * If the task under the default context or stream has not been completed, + * the system will wait for the task to complete before releasing it. + * + * @par Restriction + * @li The Context, Stream, and Event that are explicitly created + * on the device to be reset. Before resetting, + * it is recommended to follow the following interface calling sequence, + * otherwise business abnormalities may be caused. + * @li Interface calling sequence: + * call aclrtDestroyEvent interface to release Event or + * call aclrtDestroyStream interface to release explicitly created Stream-> + * call aclrtDestroyContext to release explicitly created Context-> + * call aclrtResetDevice interface + * + * @param deviceId [IN] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief get target device of current thread + * + * @param deviceId [OUT] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); + +/** + * @ingroup AscendCL + * @brief get target side + * + * @param runMode [OUT] the run mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); + +/** + * @ingroup AscendCL + * @brief Wait for compute device to finish + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); + +/** + * @ingroup AscendCL + * @brief Set Scheduling TS + * + * @param tsId [IN] the ts id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); + +/** + * @ingroup AscendCL + * @brief get total device number. + * + * @param count [OUT] the device number + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); + +/** + * @ingroup AscendCL + * @brief create event instance + * + * @param event [OUT] created event + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); + +/** + * @ingroup AscendCL + * @brief destroy event instance + * + * @par Function + * Only events created through the aclrtCreateEvent interface can be + * destroyed, synchronous interfaces. When destroying an event, + * the user must ensure that the tasks involved in the aclrtSynchronizeEvent + * interface or the aclrtStreamWaitEvent interface are completed before + * they are destroyed. + * + * @param event [IN] event to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief Record an Event in the Stream + * + * @param event [IN] event to record + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Reset an event + * + * @par Function + * Users need to make sure to wait for the tasks in the Stream + * to complete before resetting the Event + * + * @param event [IN] event to reset + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Queries an event's status + * + * @param event [IN] event to query + * @param status [OUT] event status + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); + +/** + * @ingroup AscendCL + * @brief Block Host Running, wait event to be complete + * + * @param event [IN] event to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief computes the elapsed time between events. + * + * @param ms [OUT] time between start and end in ms + * @param start [IN] starting event + * @param end [IN] ending event + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); + +/** + * @ingroup AscendCL + * @brief alloc memory on device + * + * @par Function + * alloc for size linear memory on device + * and return a pointer to allocated memory by *devPtr + * + * @par Restriction + * @li The memory requested by the aclrtMalloc interface needs to be released + * through the aclrtFree interface. + * @li Before calling the media data processing interface, + * if you need to apply memory on the device to store input or output data, + * you need to call acldvppMalloc to apply for memory. + * + * @param devPtr [OUT] pointer to pointer to allocated memory on device + * @param size [IN] alloc memory size + * @param policy [IN] memory alloc policy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFree | acldvppMalloc | aclrtMallocCached + */ +ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); + +/** + * @ingroup AscendCL + * @brief allocate memory on device with cache + * + * @par Function + * alloc for size linear memory on device + * and return a pointer to allocated memory by *devPtr + * + * @par Restriction + * @li The memory requested by the aclrtMallocCached interface needs to be released + * through the aclrtFree interface. + * + * @param devPtr [OUT] pointer to pointer to allocated memory on device + * @param size [IN] alloc memory size + * @param policy [IN] memory alloc policy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFree | aclrtMalloc + */ +ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); + +/** + * @ingroup AscendCL + * @brief flush cache data to ddr + * + * @param devPtr [IN] the pointer that flush data to ddr + * @param size [IN] flush size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief invalidate cache data + * + * @param devPtr [IN] pointer to invalidate cache data + * @param size [IN] invalidate size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free device memory + * + * @par Function + * can only free memory allocated through the aclrtMalloc interface + * + * @param devPtr [IN] Pointer to memory to be freed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMalloc + */ +ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); + +/** + * @ingroup AscendCL + * @brief alloc memory on host + * + * @par Restriction + * @li The requested memory cannot be used in the Device + * and needs to be explicitly copied to the Device. + * @li The memory requested by the aclrtMallocHost interface + * needs to be released through the aclrtFreeHost interface. + * + * @param hostPtr [OUT] pointer to pointer to allocated memory on the host + * @param size [IN] alloc memory size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFreeHost + */ +ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free host memory + * + * @par Function + * can only free memory allocated through the aclrtMallocHost interface + * + * @param hostPtr [IN] free memory pointer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMallocHost + */ +ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); + +/** + * @ingroup AscendCL + * @brief synchronous memory replication between host and device + * + * @param dst [IN] destination address pointer + * @param destMax [IN] Max length of the destination address memory + * @param src [IN] source address pointer + * @param count [IN] the length of byte to copy + * @param kind [IN] memcpy type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, + aclrtMemcpyKind kind); + +/** + * @ingroup AscendCL + * @brief Initialize memory and set contents of memory to specified value + * + * @par Function + * The memory to be initialized is on the Host or device side, + * and the system determines whether + * it is host or device according to the address + * + * @param devPtr [IN] Starting address of memory + * @param maxCount [IN] Max length of destination address memory + * @param value [IN] Set value + * @param count [IN] The length of memory + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); + +/** + * @ingroup AscendCL + * @brief Asynchronous memory replication between Host and Device + * + * @par Function + * After calling this interface, + * be sure to call the aclrtSynchronizeStream interface to ensure that + * the task of memory replication has been completed + * + * @par Restriction + * @li For on-chip Device-to-Device memory copy, + * both the source and destination addresses must be 64-byte aligned + * + * @param dst [IN] destination address pointer + * @param destMax [IN] Max length of destination address memory + * @param src [IN] source address pointer + * @param count [IN] the number of byte to copy + * @param kind [IN] memcpy type + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, + aclrtMemcpyKind kind, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Asynchronous initialize memory + * and set contents of memory to specified value async + * + * @par Function + * The memory to be initialized is on the Host or device side, + * and the system determines whether + * it is host or device according to the address + * + * @param devPtr [IN] destination address pointer + * @param maxCount [IN] Max length of destination address memory + * @param value [IN] set value + * @param count [IN] the number of byte to set + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create stream instance + * + * @param stream [OUT] the created stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); + +/** + * @ingroup AscendCL + * @brief destroy stream instance + * + * @par Function + * Can only destroy streams created through the aclrtCreateStream interface + * + * @par Restriction + * Before calling the aclrtDestroyStream interface to destroy + * the specified Stream, you need to call the aclrtSynchronizeStream interface + * to ensure that the tasks in the Stream have been completed. + * + * @param stream [IN] the stream to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateStream | aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief block the host until all tasks + * in the specified stream have completed + * + * @param stream [IN] the stream to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Blocks the operation of the specified Stream until + * the specified Event is completed. + * Support for multiple streams waiting for the same event. + * + * @param stream [IN] the wait stream If using thedefault Stream, set NULL + * @param event [IN] the event to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief set group + * + * @par Function + * set the task to the corresponding group + * + * @param groupId [IN] group id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail + */ +ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); + +/** + * @ingroup AscendCL + * @brief get the number of group + * + * @par Function + * get the number of group. if the number of group is zero, + * it means that group is not supported or group is not created. + * + * @param count [OUT] the number of group + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + */ +ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); + +/** + * @ingroup AscendCL + * @brief create group information + * + * @retval null for failed. + * @retval OtherValues success. + * + * @see aclrtDestroyGroupInfo + */ +ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); + +/** + * @ingroup AscendCL + * @brief destroy group information + * + * @param groupInfo [IN] pointer to group information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateGroupInfo + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); + +/** + * @ingroup AscendCL + * @brief get all group information + * + * @param groupInfo [OUT] pointer to group information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount + */ +ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); + +/** + * @ingroup AscendCL + * @brief get detail information of group + * + * @param groupInfo [IN] pointer to group information + * @param groupId [IN] group index value + * @param attr [IN] group attribute + * @param attrValue [OUT] pointer to attribute value + * @param valueLen [IN] length of attribute value + * @param paramRetSize [OUT] pointer to real length of attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount | aclrtGetAllGroupInfo + */ +ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, + aclrtGroupAttr attr, void *attrValue, size_t valueLen, + size_t *paramRetSize); + +/** + * @ingroup AscendCL + * @brief checking whether current device and peer device support the p2p feature + * + * @param canAccessPeer [OUT] pointer to save the checking result + * @param deviceId [IN] current device id + * @param peerDeviceId [IN] peer device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); + +/** + * @ingroup AscendCL + * @brief enable the peer device to support the p2p feature + * + * @param peerDeviceId [IN] the peer device id + * @param flags [IN] reserved field, now it must be zero + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); + +/** + * @ingroup AscendCL + * @brief disable the peer device to support the p2p function + * + * @param peerDeviceId [IN] the peer device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); + +/** + * @ingroup AscendCL + * @brief Obtain the free memory and total memory of specified attribute. + * the specified memory include normal memory and huge memory. + * + * @param attr [IN] the memory attribute of specified device + * @param free [OUT] the free memory of specified device + * @param total [OUT] the total memory of specified device. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_RT_H_ diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h new file mode 100644 index 00000000..c357518d --- /dev/null +++ b/inc/external/acl/acl_tdt.h @@ -0,0 +1,276 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ +#define INC_EXTERNAL_ACL_ACL_TDT_H_ + +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum acltdtTensorType { + ACL_TENSOR_DATA_UNDEFINED = -1, + ACL_TENSOR_DATA_TENSOR, + ACL_TENSOR_DATA_END_OF_SEQUENCE, + ACL_TENSOR_DATA_ABNORMAL +}; + +typedef struct acltdtDataItem acltdtDataItem; +typedef struct acltdtDataset acltdtDataset; +typedef struct acltdtChannelHandle acltdtChannelHandle; + +/** + * @ingroup AscendCL + * @brief Get tensor type from item + * + * @param dataItem [IN] pointer to the data item + * + * @retval Tensor type. + * @retval ACL_DT_UNDEFINED if dataItem is null + */ +ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data type from item + * + * @param dataItem [IN] pointer to the data item + * + * @retval Data type. + * @retval ACL_DT_UNDEFINED if dataItem is null + */ +ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data address from item + * + * @param dataItem [IN] pointer to data item + * + * @retval null for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data size from item + * + * @param dataItem [IN] pointer to data item + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get dim's number from item + * + * @param dataItem [IN] pointer to data item + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get dims from item + * + * @param dataItem [IN] the struct of data item + * @param dims [IN|OUT] pointer to the dims of dataTtem + * @param dimNum [IN] the size of the dims + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); + +/** + * @ingroup AscendCL + * @brief Create the struct of data item + * + * @param tdtType [IN] Tdt tensor type + * @param dims [IN] pointer of tdtDataItem's dims + * @param dimNum [IN] Dim number + * @param dataType [IN] Data type + * @param data [IN] Data pointer + * @param size [IN] Data size + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyDataItem + */ +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, + aclDataType dataType, void *data, size_t size); + +/** + * @ingroup AscendCL + * @brief Destroy the struct of data item + * + * @param dataItem [IN] pointer to the data item + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateDataItem + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Create the tdt dataset + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyDataset + */ +ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); + +/** + * @ingroup AscendCL + * @brief Destroy the tdt dataset + * + * @param dataset [IN] pointer to the dataset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateDataset + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Get the data item + * + * @param dataset [IN] pointer to the dataset + * @param index [IN] index of the dataset + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtAddDataItem + */ +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); + +/** + * @ingroup AscendCL + * @brief Get the data item + * + * @param dataset [OUT] pointer to the dataset + * @param dataItem [IN] pointer to the data item + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtGetDataItem + */ +ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get the size of dataset + * + * @param dataset [IN] pointer to the dataset + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Stop the channel + * + * @param handle [IN] pointer to the channel handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateChannel | acltdtDestroyChannel + */ +ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); + +/** + * @ingroup AscendCL + * @brief Create the channel + * + * @param deviceId [IN] the device id + * @param name [IN] the channel's name + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtStopChannel | acltdtDestroyChannel + */ +ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); + +/** + * @ingroup AscendCL + * @brief Destroy the channel + * + * @param handle [IN] pointer to the channel handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateChannel | acltdtStopChannel + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); + +/** + * @ingroup AscendCL + * @brief Send tensor to device + * + * @param handle [IN] pointer to the channel handle + * @param dataset [IN] pointer to the dataset + * @param timeout [IN] to be reserved, now it must be -1 + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtReceiveTensor + */ +ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, + int32_t timeout); + +/** + * @ingroup AscendCL + * @brief Receive tensor from device + * + * @param handle [IN] pointer to the channel handle + * @param dataset [OUT] pointer to the dataset + * @param timeout [IN] to be reserved, now it must be -1 + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtSendTensor + */ +ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, + int32_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 2dd2c70c..73d9564b 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -46,6 +46,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h new file mode 100644 index 00000000..1a0f582d --- /dev/null +++ b/inc/external/acl/ops/acl_dvpp.h @@ -0,0 +1,2389 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if !defined(ENABLE_DVPP_INTERFACE) +#if defined(_MSC_VER) +#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).") +#else +#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)." +#endif +#endif + +#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ + +#include +#include +#include "acl/acl.h" +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct acldvppPicDesc acldvppPicDesc; +typedef struct acldvppBatchPicDesc acldvppBatchPicDesc; +typedef struct acldvppRoiConfig acldvppRoiConfig; +typedef struct acldvppResizeConfig acldvppResizeConfig; +typedef struct acldvppBorderConfig acldvppBorderConfig; +typedef struct acldvppLutMap acldvppLutMap; +typedef struct acldvppChannelDesc acldvppChannelDesc; +typedef struct acldvppJpegeConfig acldvppJpegeConfig; +typedef struct aclvdecChannelDesc aclvdecChannelDesc; +typedef struct acldvppStreamDesc acldvppStreamDesc; +typedef struct aclvdecFrameConfig aclvdecFrameConfig; +typedef struct aclvencChannelDesc aclvencChannelDesc; +typedef struct aclvencFrameConfig aclvencFrameConfig; +typedef struct acldvppHist acldvppHist; +typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData); +typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata); + +// Supported Pixel Format +enum acldvppPixelFormat { + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 +}; + +// Stream Format +enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; + +// Supported Channel Mode +enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; + +// Supported Border Type +enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; + +// Venc parameter type +enum aclvencChannelDescParamType { + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 +}; + +/** + * @ingroup AscendCL + * @brief alloc device memory for dvpp. + * + * @par Function + * @li It's mainly used for allocating memory to device media data processing. + * The requested memory meets the data processing requirements. + * After calling this interface to request memory, + * you must release the memory using the acldvppFree interface. + * @li When calling the acldvppMalloc interface to apply for memory, + * the size entered by the user is aligned upwards to 32 integer multiples, + * and an additional 32 bytes are applied. + * + * @par Restriction + * If the user uses the acldvppMalloc interface to apply for a large block of + * memory and divide and manage the memory by himself, + * when applying for memory, the user needs to align up to 32 integer + * times + 32 bytes (ALIGN_UP [len] +32 words) according to + * the actual data size of each picture Section) to manage memory. + * + * @param devPtr [OUT] memory pointer. + * @param size [IN] memory size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppFree + */ +ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free device memory for dvpp. + * + * @par Function + * Free the memory requested through the acldvppMalloc interface + * @param devPtr [IN] memory pointer to free. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppMalloc + */ +ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr); + +/** + * @ingroup AscendCL + * @brief create DvppChannelDesc. + * + * @par Function + * Create a channel for image data processing. + * The same channel can be reused + * and is no longer available after destruction + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy dvppChannelDesc. + * + * @par Function + * Can only destroy channels created by the acldvppCreateChannel interface + * @param channelDesc [IN] the channel description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannelDesc | acldvppDestroyChannel + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp channel Id. + * + * @par Restriction + * Interface calling sequence: + * acldvppCreateChannelDesc --> acldvppCreateChannel --> + * acldvppGetChannelDescChannelId + * + * @param channelDesc [IN] the channel description. + * + * @retval channel id. + * + * @see acldvppCreateChannelDesc | acldvppCreateChannel + */ +ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp picture description. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp picture description. + * + * @par Function + * Can only destroy picture description information created + * through acldvppCreatePicDesc interface. + * @param picDesc [IN] dvpp picture description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's data. + * + * @param picDesc [OUT] dvpp picture description. + * @param dataDev [IN] dvpp picture dataDev.Must be the memory + * requested using the acldvppMalloc interface. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppMalloc + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's size. + * + * @param picDesc [OUT] dvpp picture description. + * @param size dvpp [IN] picture size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's format. + * + * @param picDesc [OUT] dvpp picture description. + * @param format [IN] dvpp picture format. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's width. + * + * @param picDesc [OUT] dvpp picture description. + * @param width [IN] dvpp picture width. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's height. + * + * @param picDesc [OUT] dvpp picture description. + * @param height [IN] dvpp picture height. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's widthStride. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * + * @param picDesc [OUT] dvpp picture description. + * @param widthStride [IN] dvpp picture widthStride. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's heightStride. + * + * @par Restriction + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param picDesc [OUT] dvpp picture description. + * @param heightStride [IN] dvpp picture heightStride. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's retcode. + * + * @param picDesc [OUT] dvpp picture description. + * @param retCode [IN] dvpp picture retcode. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode); + +/** + * @ingroup AscendCL + * @brief Get picture data. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval picture data addr. + * @retval default nullptr. + */ +ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get picture data size. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval picture data size. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's format. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval format + * @retval default PIXEL_FORMAT_YUV_400. + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's width. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's height. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval height. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's widthStride. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * + * @param picDesc [IN] dvpp picture description. + * + * @retval stride width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's heightStride. + * + * @par Restriction + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval stride height. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's retcode. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval ret code. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp roi config. + * + * @param left [IN] the left offset, must be even + * @param right [IN] the right offset, must be odd + * @param top [IN] the top offset, must be even + * @param bottom [IN] the bottom offset, must be odd + * + * @retval null for failed. + * @retval other success + */ +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, + uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp roi config. + * + * @par Function + * Destroys data created through the acldvppCreateRoiConfig interface + * @param roiConfig [IN] dvpp roi config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig); + +/** + * @ingroup AscendCL + * @brief Set left of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param left [IN] left offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left); + +/** + * @ingroup AscendCL + * @brief Set right of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param right [IN] right offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right); + +/** + * @ingroup AscendCL + * @brief Set top of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param top [IN] top offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top); + +/** + * @ingroup AscendCL + * @brief Set bottom of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param bottom [IN] bottom offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Set RoiConfig. + * + * @param config [OUT] RoiConfig + * @param left [IN] left offset + * @param right [IN] right offset + * @param top [IN] top offset + * @param bottom [IN] bottom offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, + uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Create dvpp resize config. + * The specified scaling algorithm is not supported. + * The default scaling algorithm is "nearest neighbor interpolation". + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp resize config. + * + * @par Function + * Destroys the scaling configuration data created by + * the acldvppCreateResizeConfig interface + * + * @param resizeConfig [IN] resize config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig); + +/** + * @ingroup AscendCL + * @brief Create jpege config. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy jpege config. + * + * @par Function + * Destroys the encoding configuration data created by + * the acldvppCreateJpegeConfig interface + * @param jpegeConfig [IN] config pointer to destroy. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateJpegeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig); + +/** + * @ingroup AscendCL + * @brief Set jpege config's level. + * + * @param jpegeConfig [OUT] Call the acldvppCreateJpegeConfig + * interface to create acldvppJpegeConfig data + * @param level [IN] Encoding quality range [0, 100], + * where level 0 encoding quality is similar to level 100, + * and the smaller the value in [1, 100], + * the worse the quality of the output picture. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level); + +/** + * @ingroup AscendCL + * @brief Get jpege config's level. + * + * @param jpegeConfig [IN] jpege config. + * + * @retval compression level. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig); + +/** + * @ingroup AscendCL + * @brief create vdecChannelDesc.Channel description information + * when creating a video data processing channel. + * + * @retval null for failed. + * @retval other success + */ +ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy vdecChannelDesc. + * + * @par Function + * Can only destroy aclvdecChannelDesc type created + * through aclvdecCreateChannelDesc interface + * @param channelDesc [IN] channel description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + + * @see aclvdecCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's channel id. + * + * @param channelDesc [OUT] vdec channel description. + * @param channelId [IN] decoding channel id: 0~15. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's thread id. + * + * @param channelDesc [OUT] vdec channel description. + * @param threadId [IN] thread id. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's callback function. + * + * @param channelDesc [OUT] vdec channel description. + * @param callback [IN] function callback.Function prototype: + * void (* aclvdecCallback) + * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCallback + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's video encoding type. + * + * @param channelDesc [OUT] vdec channel description. + * @param enType [IN] video encoding type. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture format. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicFormat [IN] out picture format (acldvppPixelFormat). + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc, + acldvppPixelFormat outPicFormat); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture width. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicWidth [IN] out picture width. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture height. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicHeight [IN] out picture height. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's reference frame num. + * + * @param channelDesc [OUT] vdec channel description. + * @param refFrameNum [IN] reference frame num. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's bit depth. + * + * @param channelDesc [OUT] vdec channel description. + * @param bitDepth [IN] bit depth. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's channel id. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval decoding channel id: 0~15. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's thread id. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval thread id. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's callback function. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval function callback.Function prototype: + * void (* aclvdecCallback) + * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) + * @retval default null. + * + * @see aclvdecCallback + */ +ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's video encoding type. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval video encoding type. + * @retval default H265_MAIN_LEVEL. + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture format. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture format. + * @retval default DVPP_OUTPUT_YUV420SP_UV. + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture width. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture height. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture height (for vdec malloc memory). + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's bit depth. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval bit depth. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's reference frame num. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval reference frame num. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief create vencChannelDesc. + * + * @retval null for failed, other success + */ +ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy vencChannelDesc. + * + * @param channelDesc [IN] channel desc. + * + * @retval ACL_SUCCESS:success, other:failed + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Set decoding thread id for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param threadId [IN] thread id + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId); + +/** + * @ingroup AscendCL + * @brief Set func callback for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param callback [IN] func callback + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback); + +/** + * @ingroup AscendCL + * @brief Set video encoding type for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param enType [IN] video encoding type + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType); + +/** + * @ingroup AscendCL + * @brief Set pic format for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picFormat [IN] pic format + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc, + acldvppPixelFormat picFormat); + +/** + * @ingroup AscendCL + * @brief Set out pic width for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picWidth [IN] pic width + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth); + +/** + * @ingroup AscendCL + * @brief Set pic height for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picHeight [IN] pic height + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight); + +/** + * @ingroup AscendCL + * @brief Set key frame interval for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param keyFrameInterval [IN] Interval of key frame + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc, + uint32_t keyFrameInterval); + +/** + * @ingroup AscendCL + * @brief Set output buffer address for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param bufAddr [IN] output buffer address + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr); + +/** + * @ingroup AscendCL + * @brief Set output buffer size for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param bufSize [IN] output buffer size + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize); + +/** + * @ingroup AscendCL + * @brief Set rc model for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param rcMode [IN] venc rc mode(VBR=1, CBR=2) + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode); + +/** + * @ingroup AscendCL + * @brief Set source rate for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param srcRate [IN] source rate + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate); + +/** + * @ingroup AscendCL + * @brief Set max bit rate for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param maxBitRate [IN] max bit rate + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate); + +/** + * @ingroup AscendCL + * @brief Set venc parameter for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + const void *param); + +/** + * @ingroup AscendCL + * @brief Get output buffer address for venc channel desc. + * + * @param channelDesc[IN] venc channel desc + * + * @retval output buffer address + */ +ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get output buffer size for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval output buffer size + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get decoding channel id for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval decoding channel id: 0~15, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get decoding thread id for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval thread id, default 0 + */ +ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get func callback for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval func callback, default null + */ +ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get video encoding type for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval video encoding type, default H265_MAIN_LEVEL + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic format for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic format + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic width for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic width, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic height for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic height, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get interval of key frame for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval interval of key frame, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get rc mode for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval rc mode, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get source rate for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval source rate, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get max bit rate for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval max bit rate, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get venc parameter for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief get forced restart of I-frame interval from config + * + * @param config [IN] venc frame config + * + * @retval 0: Not forced; 1: Forced restart of I-frame -1: error + */ +ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief get forced restart of I-frame interval from config + * + * @param config [IN] venc frame config + * + * @retval Whether it is the end frame: 0: no; 1: end frame + */ +ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief set single frame encoding configuration parameters + * + * @param config [OUT] venc frame config + * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame); + +/** + * @ingroup AscendCL + * @brief set single frame encoding configuration parameters + * + * @param config [OUT] venc frame config + * @param eos [IN] Whether it is the end frame: 0: no; 1: end frame + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos); + +/** + * @ingroup AscendCL + * @brief dvpp venc destroy frame config + * + * @param config [IN] venc frame config + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief Create dvpp venc frame config. + * + * @retval null for failed, other aclvencFrameConfig ptr + */ +ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig(); + +/** + * @ingroup AscendCL + * @brief Create dvpp venc channel. + * + * @param channelDesc [IN|OUT] venc channel desc + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp venc channel. + * + * @param channelDesc [IN] venc channel desc + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp venc launch send frame task. + * + * @param channelDesc [IN] venc channel desc + * @param input [IN] input picture desc + * @param reserve [IN] reserve parameter + * @param config [IN] dvpp frame config + * @param userdata [IN] user callback function + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve, + aclvencFrameConfig *config, void *userdata); + +/** + * @ingroup AscendCL + * @brief Create dvpp stream description. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp stream description. + * + * @par Function + * Can only destroy acldvppStreamDesc type created through + * acldvppCreateStreamDesc interface. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateStreamDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Set stream description's data addr. + * + * @param streamDesc [OUT] dvpp stream description. + * @param dataDev [IN] data addr. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev); + +/** + * @ingroup AscendCL + * @brief Set stream description's data size. + * + * @param streamDesc [OUT] dvpp stream description. + * @param size [IN] data size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size); + +/** + * @ingroup AscendCL + * @brief Set stream description's format. + * + * @param streamDesc [OUT] dvpp stream description. + * @param format [IN] stream format. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format); + +/** + * @ingroup AscendCL + * @brief Set stream description's timestamp. + * + * @param streamDesc [OUT] dvpp stream description. + * @param timestamp [IN] current timestamp. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp); + +/** + * @ingroup AscendCL + * @brief Set stream description's ret code. + * + * @param streamDesc [OUT] dvpp stream description. + * @param retCode [IN] result code. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode); + +/** + * @ingroup AscendCL + * @brief Set stream description's eos. + * + * @param streamDesc [OUT] dvpp stream description. + * @param eos [IN] end flag of sequence. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos); + +/** + * @ingroup AscendCL + * @brief Get stream description's data addr. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval data addr. + * @retval deault nullptr. + */ +ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's data size. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval data size. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's format. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval stream format. + * @retval default ACL_DVPP_STREAM_H264. + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's timestamp. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval current timestamp. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's retCode. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval result code. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's eos. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval end flag of sequence. + * @retval default 0(false). + */ +ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Create vdec frame config. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy vdec frame config. + * + * @par Function + * Can only destroy aclvdecFrameConfig type created through + * aclvdecCreateFrameConfig interface + * + * @param vdecFrameConfig [IN] vdec frame config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateFrameConfig + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig); + +/** + * @ingroup AscendCL + * @brief Get image width and height of jpeg. + * + * @param data [IN] image data in host memory + * @param size [IN] the size of image data + * @param width [OUT] the width of image from image header + * @param height [OUT] the height of image from image header + * @param components [OUT] the components of image from image header + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, + int32_t *components); + +/** + * @ingroup AscendCL + * @brief Predict encode size of jpeg image. + * + * @param inputDesc [IN] dvpp image desc + * @param config [IN] jpeg encode config + * @param size [OUT] the size predicted of image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, + const acldvppJpegeConfig *config, uint32_t *size); + +/** + * @ingroup AscendCL + * @brief Predict decode size of jpeg image. + * + * @param data [IN] origin image data in host memory + * @param dataSize [IN] the size of origin image data + * @param outputPixelFormat [IN] the pixel format jpeg decode + * @param decSize [OUT] the size predicted for decode image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); + +/** + * @ingroup AscendCL + * @brief Get image width and height of png. + * + * @param data [IN] image data in host memory + * @param size [IN] the size of image data + * @param width [OUT] the width of image from image header + * @param height [OUT] the height of image from image header + * @param components [OUT] the components of image from image header + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, + uint32_t *height, int32_t *components); + +/** + * @ingroup AscendCL + * @brief Predict decode size of png image. + * + * @param data [IN] origin image data in host memory + * @param dataSize [IN] the size of origin image data + * @param outputPixelFormat [IN] the pixel format jpeg decode + * @param decSize [OUT] the size predicted for decode image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); + +/** + * @ingroup AscendCL + * @brief Create dvpp channel, the same channel can be reused + * and is no longer available after destruction. + * + * @param channelDesc [IN|OUT] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp channel. + * + * @par Restriction + * Can only destroy channel created through the acldvppCreateChannel interface + * + * @param channelDesc [IN] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp vpc resize. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] resize input picture destruction + * @param outputDesc [IN|OUT] resize output picture destruction + * @param resizeConfig [IN] resize config + * @param stream [IN] resize task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + * | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop. + * + * @par Function + * crop the input picture according to the specified area, + * and then store the picture in the output memory as the output picture + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] crop input picture destruction + * @param outputDesc [IN|OUT] crop output picture destruction + * @param cropArea [IN] crop area config + * @param stream [IN] crop task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop. + * + * @par Function + * crop the input batch picture according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param stream [IN] crop batch task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop and paste. + * + * @par Function + * crop the input picture according to the specified area, + * and paste the picture to the specified position of the target picture + * as the output picture + * + * @param channelDesc [IN] thechannel destruction + * @param inputDesc [IN] crop and paste input picture destruction + * @param outputDesc [IN|OUT] crop and paste output picture destruction + * @param cropArea [IN] crop area config + * @param pasteArea [IN] paste area config + * @param stream [IN] crop and paste task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop and paste. + * + * @par Function + * crop the input batch picture according to the specified area, + * and paste the pictures to the specified position of the target pictures + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param pasteAreas [IN] paste area configs + * @param stream [IN] crop batch task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc jpeg decode. + * + * @par Function + * For different source picture formats, after decoding, + * output pictures in the following format: + * @li jpeg(444) -> YUV444SP:V is front U is back, + * YUV420 SP V is front U is back, YUV420SP U is front V is back; + * @li jpeg(422) -> YUV422SP:V is in front U is behind, + * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind; + * @li jpeg(420) -> YUV420SP: + * V is front U is back, YUV420SP U is front V is back; + * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80. + * + * @param channelDesc [IN] the channel destruction + * @param data [IN] decode input picture destruction's data + * @param size [IN] decode input picture destruction's size + * @param outputDesc [IN|OUT] decode output picture destruction + * @param stream [IN] decode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc jpeg encode. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] encode input picture destruction + * @param data [OUT] encode output picture destruction's data + * @param size [IN|OUT] encode output picture destruction's size + * @param config [IN] jpeg encode config + * @param stream [IN] encode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateJpegeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + const void *data, uint32_t *size, acldvppJpegeConfig *config, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc png decode. + * + * @param channelDesc [IN] the channel destruction + * @param data [IN] decode input picture destruction's data + * @param size [IN] decode input picture destruction's size + * @param outputDesc [IN|OUT] decode output picture destruction + * @param stream [IN] decode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create vdec channel. + * + * @par Function + * Create a channel for video data processing, + * the same channel can be reused, + * and is no longer available after destruction + * + * @param channelDesc [IN|OUT] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy vdec channel. + * + * @par Function + * Can only destroy channels created by the aclvdecCreateChannel interface + * + * @param channelDesc [IN] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp vdec send frame. + * + * @par Function + * Pass the input memory to be decoded + * and the decoded output memory to the decoder for decoding + * + * @param channelDesc [IN] vdec channel destruction + * @param input [IN] input stream destruction + * @param output [IN|OUT] output picture destruction + * @param config [IN] vdec frame config + * @param userData [IN] user data for callback function + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); + +/** + * @ingroup AscendCL + * @brief dvpp vdec send skipped frame. + * + * @par Function + * Pass video frame to decoder + * + * @param channelDesc [IN] vdec channel destruction + * @param input [IN] input stream destruction + * @param config [IN] vdec frame config + * @param userData [IN] user data for callback function + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame + */ +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + aclvdecFrameConfig *config, void *userData); + +/** + * @ingroup AscendCL + * @brief dvpp vpc convert color. + * + * @par Restriction + * @li outputDesc:Width height stride, No changes are allowed. Just configure 0 + * @par Function + * Convert color gamut + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] convert color input picture destruction + * @param outputDesc [IN|OUT] convert color output picture destruction + * @param stream [IN] convert color task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc pyramid down. + * + * @par Restriction + * @li outputDesc:format only supported YUV400 + * @par Function + * Image pyramid down + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] pyr down input picture destruction + * @param outputDesc [IN|OUT] pyr down output picture destruction + * @param reserve [IN] reserved param , must be nullptr + * @param stream [IN] pyr down task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Set dvpp channel mode. + * + * @param channelDesc [OUT] the channel destruction + * @param mode [IN] channel mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); + +/** + * @ingroup AscendCL + * @brief Set resize config interpolation. + * + * @param resizeConfig [OUT] the resize config + * @param interpolation [IN] interpolation + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig, + uint32_t interpolation); + +/** + * @ingroup AscendCL + * @brief Get resize config interpolation. + * + * @param resizeConfig [IN] the resize config + * + * @retval Interpolation of resize config. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig); + +/** + * @ingroup AscendCL + * @brief Set vdec channel out mode. + * + * @param channelDesc [OUT] the channel destruction + * @param outMode [IN] channel out mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); + +/** + * @ingroup AscendCL + * @brief Get vdec channel out mode. + * + * @param channelDesc [IN] the channel destruction + * + * @retval Out mode of channel destruction + * @retval default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp batch picture description. + * + * @param batchSize [IN] batch size + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture description. + * + * @param batchPicDesc [IN] dvpp batch picture description. + * @param index [IN] index of batch + * + * @retval null for failed. + * @retval OtherValues Failure + * + * @see acldvppCreateBatchPicDesc + */ +ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp batch picture description. + * + * @par Function + * Can only destroy batch picture description information created + * through acldvppCreateBatchPicDesc interface. + * + * @param batchPicDesc [IN] dvpp batch picture description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateBatchPicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp lut map. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap(); + +/** + * @ingroup AscendCL + * @brief Destroy lut map. + * + * @param lutMap [IN] lut map + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap); + +/** + * @ingroup AscendCL + * @brief Get lut map dims. + * + * @param lutMap [IN] lut map + * + * @retval 0 for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); + +/** + * @ingroup AscendCL + * @brief Get lut map data. + * + * @param lutMap [IN] lut map + * @param dim [IN] input dim of map + * @param data [OUT] the dim of lut map's data + * @param len [OUT] the dim of lut map's length + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, + uint32_t *len); +/** + * @ingroup AscendCL + * @brief Vpc equalize hist. + * + * @param channelDesc [IN] channel desc + * @param inputDesc [IN] input desc + * @param outputDesc [IN|OUT] output desc + * @param lutMap [IN] lut map param + * @param stream [IN] runtime stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create dvpp border config. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); + +/** + * @ingroup AscendCL + * @brief Set value of border config. + * + * @param borderConfig [OUT] border config + * @param index [IN] index of value array + * @param value [IN] value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, + double value); + +/** + * @ingroup AscendCL + * @brief Set border type of border config. + * + * @param borderConfig [OUT] border config + * @param borderType [IN] border type + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig, + acldvppBorderType borderType); + +/** + * @ingroup AscendCL + * @brief Set top of border config. + * + * @param borderConfig [OUT] border config + * @param top [IN] top of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top); + +/** + * @ingroup AscendCL + * @brief Set bottom of border config. + * + * @param borderConfig [OUT] border config + * @param bottom [IN] bottom of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Set left of border config. + * + * @param borderConfig [OUT] border config + * @param left [IN] left of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left); + +/** + * @ingroup AscendCL + * @brief Set right of border config. + * + * @param borderConfig [OUT] border config + * @param right [IN] right of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right); + +/** + * @ingroup AscendCL + * @brief Get value of border config. + * + * @param borderConfig [IN] border config + * @param index[IN] index of value array + * + * @retval invalid value is < 0, normal Value is >= 0 + */ +ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index); + +/** + * @ingroup AscendCL + * @brief Get border type of border config. + * + * @param borderConfig [IN] border config + * @retval border type of border config + */ +ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get right of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get Bottom of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get left of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get right of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, right value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Destroy border config. + * + * @param borderConfig [IN] border config + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Vpc make border. + * + * @param channelDesc [IN] channel desc + * @param inputDesc [IN] input desc + * @param outputDesc [IN|OUT] output desc + * @param borderConfig [IN] border config param + * @param stream [IN] runtime stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Dvpp vpc calc hist. + * + * @param channelDesc [IN] the channel destruction + * @param srcPicDesc [IN] pyr down input picture destruction + * @param hist [IN|OUT] pyr down output picture destruction + * @param reserve [IN] reserved param, must be nullptr + * @param stream [IN] task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, + acldvppHist *hist, void *reserve, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create vpc hist description. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); + +/** + * @ingroup AscendCL + * @brief Destroy vpc hist description. + * + * @par Function + * Can only destroy hist description information created + * through acldvppCreateHist interface. + * + * @param hist [IN] vpc hist description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Get dims of vpc hist description. + * + * @param hist [IN] vpc hist description. + * + * @retval dims of vpc hist description. + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Get data from vpc hist description by dim. + * + * @param hist [IN] vpc hist description. + * @param dim [IN] which dim to get data. + * @param data [OUT] address of output hist data. + * @param len [OUT] len of output hist data. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len); + +/** + * @ingroup AscendCL + * @brief Get dvpp calc hist process return code. + * + * @param hist [IN] vpc hist description. + * + * @retval Dvpp calc hist process return code. + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Set vpc hist description to 0. + * + * @par Function + * Can only clear hist description information created + * through acldvppCreateHist interface. + * + * @param hist [IN] vpc hist description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h new file mode 100644 index 00000000..46d934e6 --- /dev/null +++ b/inc/external/hccl/hccl.h @@ -0,0 +1,134 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl.h + * @brief HCCL API + */ + +#ifndef HCCL_H_ +#define HCCL_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCCL. + * + * @param clusterInfo A string identifying the cluster info file path, include file name. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief Get hccl root info. + * + * @param rootInfo A pointer identifying the hccl root info. + * @return HcclResult + */ +extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); + +/** + * @brief Initialize HCCL with root info. + * + * @param nRanks A integer identifying the rank size of the cluster. + * @param rootInfo A struct identifying the hccl root info. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief AllReduce operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, + * float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + HcclComm comm, aclrtStream stream); + +/** + * @brief Broadcast operator. + * + * @param buf A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, + aclrtStream stream); + +/** + * @brief ReduceScatter operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param recvCount An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, + HcclReduceOp op, HcclComm comm, aclrtStream stream); + +/** + * @brief AllGather operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param sendCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, + aclrtStream stream); + +/** + * @brief Destroy HCCL comm + * + * @param comm A pointer identifying the communication resource targetting + * @return HcclResult + * @see HcclCommInitClusterInfo() + */ +extern HcclResult HcclCommDestroy(HcclComm comm); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h new file mode 100644 index 00000000..0e832396 --- /dev/null +++ b/inc/external/hccl/hccl_types.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 2dd2c70c..73d9564b 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -46,6 +46,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h new file mode 100644 index 00000000..7e0f94a8 --- /dev/null +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index a5f43be9..8c0c1847 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -33,18 +33,22 @@ typedef enum { FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; +#pragma pack(push, 1) typedef struct { uint32_t fwkKernelType; // FwkkernelType_t union { ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; } fwkKernelBase; -} __attribute__((packed)) STR_FWK_OP_KERNEL; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) +#pragma pack(push, 1) struct SessionInfo { uint64_t sessionId; uint64_t kernelId; bool sessFlag; -} __attribute__((packed)); +}; +#pragma pack(pop) #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 79d94023..50b39d91 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +#pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; @@ -89,31 +90,39 @@ struct StrFWKKernel { uint64_t extInfoLen; // extend info total length uint64_t extInfoAddr; // extend info addr, ExtInfo structure -} __attribute__((packed)); +}; +#pragma pack(pop) typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) struct ShapeAndType { int32_t type; int64_t dims[kMaxShapeDims]; -} __attribute__((packed)); +}; +#pragma pack(pop) // Extend info structure for extInfoAddr const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg char infoMsg[0]; // extend value -} __attribute__((packed)); +}; +#pragma pack(pop) +#pragma pack(push, 1) struct ResultSummary { uint64_t shape_data_ptr; // shape data addr, need convert to void* uint64_t shape_data_size; // num of dims uint64_t raw_data_ptr; // raw data addr, need convert to void* uint64_t raw_data_size; // size of raw data -} __attribute__((packed)); +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8194097e..9facd20c 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -22,7 +22,8 @@ #ifndef HCCL_BASE_H_ #define HCCL_BASE_H_ - +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -95,6 +96,33 @@ typedef void *rtStream_t; */ typedef void *rtModel_t; +struct HcomOperation { + std::string hcclType; + void *inputPtr; + void *outputPtr; + u64 count; + HcclDataType dataType; + HcclReduceOp opType; + u32 root; + + HcomOperation() + { + inputPtr = nullptr; + outputPtr = nullptr; + count = 0; + dataType = HCCL_DATA_TYPE_RESERVED; + opType = HCCL_REDUCE_RESERVED; + root = 0; + } +}; + +struct HcomRemoteAccessAddrInfo { + u32 remotetRankID; + u64 remoteAddr; // host embedding table address + u64 localAddr; // device HBM address + u64 length; // Memory Length in Bytes +}; + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h new file mode 100644 index 00000000..50a64795 --- /dev/null +++ b/third_party/fwkacllib/inc/hccl/hccl_types.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index de140b4b..e491d43f 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -24,6 +24,8 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { @@ -40,6 +42,15 @@ extern "C" { */ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); +/** + * @brief Get the rank number in the group. + * + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return HcclResult + */ +HcclResult HcomGetRankSize(const char *group, u32 *rankSize); + /** * @brief Get the rank number of this rank's server within the group. * @@ -49,6 +60,15 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); */ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); +/** + * @brief Get the rank number of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. + * @return HcclResult + */ +HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); + /** * @brief Get the rank id of this rank. * @@ -58,6 +78,15 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); */ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); +/** + * @brief Get the rank id of this rank. + * + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. + * @return HcclResult + */ +HcclResult HcomGetRankId(const char *group, u32 *rankId); + /** * @brief Get the local rank id of this rank's server within the group. * @@ -67,6 +96,15 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); */ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); +/** + * @brief Get the local rank id of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankId A pointer identifying the local rank id. + * @return HcclResult + */ +HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); + /** * @brief Get the world rank id according to the group rank id. * @@ -77,6 +115,16 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); */ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +/** + * @brief Get the world rank id according to the group rank id. + * + * @param group A string identifying the group name. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. + * @return HcclResult + */ +HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); + /** * @brief Get the group rank id according to the world rank id. * @@ -87,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return HcclResult + */ +HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); + /** * @brief Create group. * @@ -97,6 +155,16 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, */ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); +/** + * @brief Create group. + * + * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. + * @return HcclResult + */ +HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); + /** * @brief Destroy group * @@ -105,6 +173,14 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); */ HcclResult hcom_destroy_group(const char *group); +/** + * @brief Destroy group + * + * @param group A string identifying the group name. + * @return HcclResult + */ +HcclResult HcomDestroyGroup(const char *group); + /** * @brief Set the gradient split strategy with in the group, according to gradient index. * @@ -115,6 +191,16 @@ HcclResult hcom_destroy_group(const char *group); */ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient index. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); + /** * @brief Set the gradient split strategy with in the group, according to gradient data size. * @@ -125,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); + /** * @brief Register memories and init resources for remote access. * @@ -134,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment */ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); + +HcclResult HcomExecInitialize(); + +HcclResult HcomExecFinalize(); + +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index c74f95ac..66638bbb 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry); typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); -typedef size_t mmSize_t; +typedef size_t mmSize_t; //lint !e410 !e1051 typedef off_t mmOfft_t; typedef pid_t mmPid_t; typedef long MM_LONG; @@ -215,6 +215,10 @@ typedef struct { #define S_IWRITE S_IWUSR #endif +#define mm_no_argument no_argument +#define mm_required_argument required_argument +#define mm_optional_argument optional_argument + #define M_FILE_RDONLY O_RDONLY #define M_FILE_WRONLY O_WRONLY #define M_FILE_RDWR O_RDWR @@ -412,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); -MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, - pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, + INT32 fdCount, + INT32 timeout, + mmCompletionHandle handleIOCP, + pmmPollData polledData, + mmPollBack pollBack); MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index a5a22b4f..aa58e722 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -237,6 +237,11 @@ typedef struct { } mmThreadAttr; typedef VOID (*mmPf)(VOID); + +#define mm_no_argument 0 +#define mm_required_argument 1 +#define mm_optional_argument 2 + #define M_FILE_RDONLY GENERIC_READ #define M_FILE_WRONLY GENERIC_WRITE #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 85f16cc5..aa8263f9 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -18,6 +18,7 @@ #define __CCE_RUNTIME_BASE_H__ #include +#include "toolchain/prof_callback.h" #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { @@ -86,10 +87,20 @@ typedef struct rtExceptionInfo { uint32_t deviceid; } rtExceptionInfo; +typedef struct rtTaskFailInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; +} rtTaskFailInfo; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); + typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -146,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts set profiling reporter callback. + */ +RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); + /** * @ingroup dvrt_base * @brief Returns the last error from a runtime call. @@ -184,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); */ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for fail task + * @param [in] uniName unique register name, can't be null + * @param [in] callback fail task callback function + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c471f128..c35a1278 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; -/** - * @ingroup - * @brief get platform - * @param [in] platForm - * @return platForm - */ -RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); - /** * @ingroup * @brief get AI core count @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); -/** - * @ingroup - * @brief set platform in gen ctx - * @param [in] platForm - * @return RT_ERROR_NONE for ok, errno for failed - */ -RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); /** * @ingroup diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 6066a12e..665c8b82 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -23,6 +23,7 @@ #include #include "tdt/status.h" #include "tdt/data_common.h" +#include "toolchain/prof_callback.h" #ifdef __cplusplus extern "C" { @@ -37,7 +38,7 @@ extern "C" { * Used for the Framework process to communicate with the TSDDaemon process, * and notify TSD to complete the initialization of other processes * -* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param logicDeviceId [IN] type #unsigned int. Logic device ID * @param rankSize [IN] type #unsigned int. The rankSize of the training. * The default value is 1. When rankSize is greater than 1, * HCCP will be pulled to perform set communication related operations. @@ -49,7 +50,7 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); /** * @ingroup Close @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); +TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); +TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); + +/** +* @ingroup TsdSetMsprofReporterCallback +* @brief 用于推理场景下设置aicpu的profilng的callback函数 +* +* @par Function +* 设置offline模式下aicpu_sd进程的profiling的callback函数 +* +* @param callback [IN] type #MsprofReporterCallback. 回调函数 +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined +*/ +TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); /** * @ingroup CreateCmdParameterObj diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 430ed14d..efb37cfb 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -17,380 +17,76 @@ #ifndef MSPROFILER_API_PROF_ACL_API_H_ #define MSPROFILER_API_PROF_ACL_API_H_ -#define MSVP_MAX_DEV_NUM 64 -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE - - -#if (OS_TYPE != LINUX) -#define MSVP_PROF_API __declspec(dllexport) -#else -#define MSVP_PROF_API __attribute__((visibility("default"))) -#endif - // DataTypeConfig -#define PROF_ACL_API 0x0001 -#define PROF_TASK_TIME 0x0002 -#define PROF_AICORE_METRICS 0x0004 -#define PROF_AICPU_TRACE 0x0008 -#define PROF_MODEL_EXECUTE 0x0010 -#define PROF_RUNTIME_API 0x0020 -#define PROF_RUNTIME_TRACE 0x0040 -#define PROF_SCHEDULE_TIMELINE 0x0080 -#define PROF_SCHEDULE_TRACE 0x0100 -#define PROF_AIVECTORCORE_METRICS 0x0200 -#define PROF_SUBTASK_TIME 0x0400 - -#define PROF_TRAINING_TRACE 0x0800 -#define PROF_HCCL_TRACE 0x1000 -#define PROF_DATA_PROCESS 0x2000 -#define PROF_TASK_TRACE 0x3842 +#define PROF_ACL_API 0x00000001 +#define PROF_TASK_TIME 0x00000002 +#define PROF_AICORE_METRICS 0x00000004 +#define PROF_AICPU_TRACE 0x00000008 +#define PROF_MODEL_EXECUTE 0x00000010 +#define PROF_RUNTIME_API 0x00000020 +#define PROF_RUNTIME_TRACE 0x00000040 +#define PROF_SCHEDULE_TIMELINE 0x00000080 +#define PROF_SCHEDULE_TRACE 0x00000100 +#define PROF_AIVECTORCORE_METRICS 0x00000200 +#define PROF_SUBTASK_TIME 0x00000400 + +#define PROF_TRAINING_TRACE 0x00000800 +#define PROF_HCCL_TRACE 0x00001000 + +#define PROF_TASK_TRACE 0x00001852 + +// system profilinig switch +#define PROF_CPU 0x00010000 +#define PROF_HARDWARE_MEMORY 0x00020000 +#define PROF_IO 0x00040000 +#define PROF_INTER_CONNECTION 0x00080000 +#define PROF_DVPP 0x00100000 +#define PROF_SYS_AICORE_SAMPLE 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE 0x00400000 #define PROF_MODEL_LOAD 0x8000000000000000 // DataTypeConfig MASK -#define PROF_ACL_API_MASK 0x0001 -#define PROF_TASK_TIME_MASK 0x0002 -#define PROF_AICORE_METRICS_MASK 0x0004 -#define PROF_AICPU_TRACE_MASK 0x0008 -#define PROF_MODEL_EXECUTE_MASK 0x0010 -#define PROF_RUNTIME_API_MASK 0x0020 -#define PROF_RUNTIME_TRACE_MASK 0x0040 -#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 -#define PROF_SCHEDULE_TRACE_MASK 0x0100 -#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 -#define PROF_SUBTASK_TIME_MASK 0x0400 - -#define PROF_TRAINING_TRACE_MASK 0x0800 -#define PROF_HCCL_TRACE_MASK 0x1000 -#define PROF_DATA_PROCESS_MASK 0x2000 +#define PROF_ACL_API_MASK 0x00000001 +#define PROF_TASK_TIME_MASK 0x00000002 +#define PROF_AICORE_METRICS_MASK 0x00000004 +#define PROF_AICPU_TRACE_MASK 0x00000008 +#define PROF_MODEL_EXECUTE_MASK 0x00000010 +#define PROF_RUNTIME_API_MASK 0x00000020 +#define PROF_RUNTIME_TRACE_MASK 0x00000040 +#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 +#define PROF_SCHEDULE_TRACE_MASK 0x00000100 +#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 +#define PROF_SUBTASK_TIME_MASK 0x00000400 + +#define PROF_TRAINING_TRACE_MASK 0x00000800 +#define PROF_HCCL_TRACE_MASK 0x00001000 + +// system profilinig mask +#define PROF_CPU_MASK 0x00010000 +#define PROF_HARDWARE_MEMORY_MASK 0x00020000 +#define PROF_IO_MASK 0x00040000 +#define PROF_INTER_CONNECTION_MASK 0x00080000 +#define PROF_DVPP_MASK 0x00100000 +#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 #define PROF_MODEL_LOAD_MASK 0x8000000000000000 #include -#include - -/** - * @name ProrErrorCode - * @brief error code enum of prof_acl_apis - */ -enum ProfErrorCode { - PROF_ERROR_NONE = 0, // ok - PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr - PROF_ERROR_REPEAT_INIT, // profiling has already been inited - PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string - PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable - PROF_ERROR_FAILURE, // failed to init or start profiling - PROF_ERROR_NOT_INITED, // profiling has not been inited - PROF_ERROR_DEVICE_INVALID, // device id invalid - PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics - PROF_ERROR_REPEAT_START, // profiilng has already been started - PROF_ERROR_NOT_STARTED, // profiling has not been started - PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed - PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed - PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode -}; - -/** - * @brief transfer profiling config in acl.json to sample config - * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} - * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); - -/** - * @name ProfInit - * @brief init profiling - * @param profInitCfg [IN] config of init profiling of json format - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); - -/** - * @name ProfAicoreMetrics - * @brief aicore metrics enum - */ -enum ProfAicoreMetrics { - PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, - PROF_AICORE_PIPELINE = 1, - PROF_AICORE_SYNCHRONIZATION = 2, - PROF_AICORE_MEMORY = 3, - PROF_AICORE_INTERNAL_MEMORY = 4, - PROF_AICORE_STALL = 5, - PROF_AICORE_METRICS_COUNT, - PROF_AICORE_NONE = 0xff, -}; - -/** - * @name ProfConfig - * @brief struct of ProfStart - */ -struct ProfConfig { - uint32_t devNums; // length of device id list - uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list - ProfAicoreMetrics aicoreMetrics; // aicore metric - uint64_t dataTypeConfig; // data type to start profiling -}; - -/** - * @name ProfStartProfiling - * @brief start profiling - * @param profStartCfg [IN] config to start profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); - -/** - * @name ProfStopProfiling - * @brief stop profiling - * @param profStopCfg [IN] config to stop profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); - -/** - * @name ProfFinalize - * @brief finalize profiling task - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfFinalize(); - -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); namespace Msprofiler { namespace Api { -/** - * @brief transfer profiling config in acl.json to sample config - * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} - * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); - -/** - * @name ProfInit - * @brief init profiling - * @param profInitCfg [IN] config of init profiling of json format - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); - -/** - * @name ProfStartProfiling - * @brief start profiling - * @param profStartCfg [IN] config to start profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); - -/** - * @name ProfStopProfiling - * @brief stop profiling - * @param profStopCfg [IN] config to stop profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); - -/** - * @name ProfFinalize - * @brief finalize profiling task - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfFinalize(); - -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); - -/** - * @name WorkMode - * @brief profiling api work mode - */ -enum WorkMode { - WORK_MODE_OFF, // profiling not at work - WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit) - WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode -}; - -/** - * @name ProfGetApiWorkMode - * @brief get profiling api work mode - * @return WorkMode - */ -MSVP_PROF_API WorkMode ProfGetApiWorkMode(); - -/** - * @name ProfSubscribeConfig - * @brief config of subscribe api - */ -struct ProfSubscribeConfig { - bool timeInfo; // subscribe op time - ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics - void* fd; // pipe fd -}; - -/** - * @name ProfGetDataTypeConfig - * @brief get DataTypeConfig of subscribe - * @param profSubscribeConfig [IN] config to subscribe data - * @return DataTypeConfig - */ -MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig); - -/** - * @name ProfModelSubscribe - * @brief subscribe data of one model id - * @param modelId [IN] model id to subscribe data - * @param devId [IN] device id of model - * @param profSubscribeConfig [IN] config to subscribe data - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId, - const ProfSubscribeConfig *profSubscribeConfig); - -/** - * @name ProfIsModelSubscribed - * @brief check if a model id is subscribed - * @param modeiId [IN] modei id to check - * @return true: subscribed, false: not - */ -MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId); - -/** - * @name ProfModelUnSubscribe - * @brief unsubscribe a model id - * @param modeiId [IN] modei id to unsubscribe - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId); - -/** - * @name ProfGetOpDescSize - * @brief get profiling data struct size - * @param opDescSize [OUT] bytes of profiling subscribe data struct - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize); - -/** - * @name ProfGetOpNum - * @brief get how many op data there are in data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opNum [OUT] number of op in data - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum); - -/** - * @name ProfGetModelId - * @brief get model id of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return model id - */ -MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpType - * @brief get op type of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opType [OUT] op type buffer - * @param opTypeLen [IN] buffer size of param opType - * @param index [IN] index of part(op) - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index); - -/** - * @name ProfGetOpName - * @brief get op name of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param opType [OUT] op name buffer - * @param opTypeLen [IN] buffer size of param opName - * @param index [IN] index of part(op) - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index); - -/** - * @name ProfGetOpStart - * @brief get op start timestamp of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op start timestamp (us) - */ -MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpEnd - * @brief get op end timestamp of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op end timestamp (us) - */ -MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpDuration - * @brief get op duration of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op duration (us) - */ -MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index); - /** * @name ProfGetOpExecutionTime * @brief get op execution time of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length + * @param data [IN] data read from pipe + * @param len [IN] data length * @param index [IN] index of part(op) * @return op execution time (us) */ -MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpCubeOps - * @brief get op cube fops of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op cube fops - */ -MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index); - -/** - * @name ProfGetOpVectorOps - * @brief get op vector fops of specific part of data - * @param data [IN] data read from pipe - * @param len [IN] data length - * @param index [IN] index of part(op) - * @return op vector fops - */ -MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index); - -} // namespace Api -} // namespace Msprofiler +uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); +} +} #endif // MSPROFILER_API_PROF_ACL_API_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h new file mode 100644 index 00000000..1299ae59 --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -0,0 +1,132 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROFILER_PROF_CALLBACK_H_ +#define MSPROFILER_PROF_CALLBACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#include "stddef.h" +#include "stdint.h" + +/** + * @name MsprofErrorCode + * @brief error code + */ +enum MsprofErrorCode { + MSPROF_ERROR_NONE = 0, + MSPROF_ERROR_MEM_NOT_ENOUGH, + MSPROF_ERROR_GET_ENV, + MSPROF_ERROR_CONFIG_INVALID, + MSPROF_ERROR_ACL_JSON_OFF, + MSPROF_ERROR, +}; + +#define MSPROF_ENGINE_MAX_TAG_LEN (31) + +/** + * @name ReporterData + * @brief struct of data to report + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen + int deviceId; // the index of device + size_t dataLen; // the length of send data + unsigned char *data; // the data content +}; + +/** + * @name MsprofReporterModuleId + * @brief module id of data to report + */ +enum MsprofReporterModuleId { + MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS + MSPROF_MODULE_HCCL, // HCCL + MSPROF_MODULE_ACL, // AclModule + MSPROF_MODULE_FRAMEWORK, // Framework + MSPROF_MODULE_RUNTIME // runtime +}; + +/** + * @name MsprofReporterCallbackType + * @brief reporter callback request type + */ +enum MsprofReporterCallbackType { + MSPROF_REPORTER_REPORT = 0, // report data + MSPROF_REPORTER_INIT, // init reporter + MSPROF_REPORTER_UNINIT, // uninit reporter +}; + +/** + * @name MsprofReporterCallback + * @brief callback to start reporter/stop reporter/report date + * @param moduleId [IN] enum MsprofReporterModuleId + * @param type [IN] enum MsprofReporterCallbackType + * @param data [IN] callback data (nullptr on INTI/UNINIT) + * @param len [IN] callback data size (0 on INIT/UNINIT) + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); + + +#define MSPROF_OPTIONS_DEF_LEN_MAX (2048) + +/** + * @name MsprofGeOptions + * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS + */ +struct MsprofGeOptions { + char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; + char options[MSPROF_OPTIONS_DEF_LEN_MAX]; +}; + +/** + * @name MsprofCtrlCallbackType + * @brief ctrl callback request type + */ +enum MsprofCtrlCallbackType { + MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env + MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options + MSPROF_CTRL_FINALIZE // stop profiling +}; + +/** + * @name MsprofCtrlCallback + * @brief callback to start/stop profiling + * @param type [IN] enum MsprofCtrlCallbackType + * @param data [IN] callback data + * @param len [IN] callback data size + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); + +/** + * @name MsprofSetDeviceCallback + * @brief callback to notify set/reset device + * @param devId [IN] device id + * @param isOpenDevice [IN] true: set device, false: reset device + */ +typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index 949011d3..ff91351b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -26,6 +26,8 @@ #define MSVP_PROF_API __attribute__((visibility("default"))) #endif +#include "prof_callback.h" + /** * @file prof_reporter.h * @defgroup reporter the reporter group @@ -33,20 +35,6 @@ */ namespace Msprof { namespace Engine { -/// the max tag length -#define MSPROF_ENGINE_MAX_TAG_LEN (31) -/** - * @ingroup reporter - * @brief struct ReporterData - * the sturct of the data send to libmsprof - */ -struct ReporterData { - char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen - int deviceId; ///< the physical id of device - size_t dataLen; ///< the length of send data - unsigned char *data; ///< the data content -}; - /** * @ingroup reporter * @brief class Reporter