From abe2c8a1ad3bf7ec699d5a91ecd542d6670c9b9b Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 20 Feb 2021 19:33:55 +0800 Subject: [PATCH 01/44] bugfix for variable->broadcast addr --- ge/graph/build/memory/graph_mem_assigner.cc | 25 +++++++++++++++------ ge/graph/build/memory/graph_mem_assigner.h | 5 +++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 33fda096..f4df8268 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); return FAILED; } - GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second), + GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), "Assign node %s continuous input memory failed.", node->GetName().c_str()) } for (auto pair : memory_offset_) { @@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { @@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, std::map out2ins; GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); // output is beginning offset, set offset for input; only support this case now - if (out2ins.size() == 1 && out2ins.begin()->second == 0) { + if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { + auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); peer_op_desc->SetOutputOffset(output_list); + GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), + out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), + output_list_this.at(out2ins.begin()->first), peer_output_offset); } else { - GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first, - out2ins.begin()->second, out2ins.size()); + GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), + out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); } // first input is beginning offset mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); @@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map &node_2_continuous_type) { for (const auto &in_node : input_continuous_node->GetInDataNodes()) { + if (in_node->GetType() == VARIABLE) { + GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(), + in_node->GetName().c_str()); + return true; + } auto iter = node_2_continuous_type.find(in_node); // In node's topo order in the front, so function can not be exception auto continuous_type = iter->second; @@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( } ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, - uint32_t continuous_type) { + uint32_t continuous_type, + bool reverse_refresh) { int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; int64_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); - auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type); + auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, + continuous_type, reverse_refresh); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index b56c3716..f4d1366d 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -131,13 +131,14 @@ class GraphMemoryAssigner { std::map &node_2_continuous_type); ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, - uint32_t continuous_type); + uint32_t continuous_type, bool reverse_refresh=false); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, + bool reverse_refresh = false); ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); From bab5b2c61b78bdadf8f95a72821c130972d175b1 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 22 Feb 2021 10:12:20 +0800 Subject: [PATCH 02/44] Add critical path log --- ge/generator/ge_generator.cc | 20 ++++++++++++-------- ge/single_op/single_op_model.cc | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 0d484fbf..072880fa 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -147,7 +147,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi return FAILED; } -static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index, +static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, bool attr) { GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); @@ -723,7 +723,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in const ComputeGraphPtr root_graph = ge_root_model->GetRootGraph(); GeModelPtr &ge_model = name_to_ge_model.begin()->second; GE_CHK_STATUS_RET_NOLOG(CheckDynamicSupport(ge_model, root_graph)); - GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); + GELOGI("After build model, The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); bool all_shape = false; (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); @@ -738,6 +738,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in } else { GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); } + GELOGI("Start save GeModel to Model buffer"); GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); return SUCCESS; } @@ -753,10 +754,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in */ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name) { - GELOGI("Start to build single op offline model."); + GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); ModelBufferData model_buff; OpEngineType engine_type = ENGINE_SYS; - return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); + Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); + GELOGI("Finish build single offline model"); + return status; } /** @@ -772,8 +775,10 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff) { - GELOGI("Start to build single op online"); - return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); + GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); + Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); + GELOGI("Finish build single online model"); + return status; } Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector &inputs, @@ -798,8 +803,7 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vectorGetOpDesc(); GE_CHECK_NOTNULL(op_desc); - op_list_[i] = node; + op_list_[op_desc->GetId()] = node; auto op_type = op_desc->GetType(); GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str()); From 9d4b0ab4dc6e30d6edd823ff6d37e3c3c2f2767f Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 22 Feb 2021 13:27:21 +0800 Subject: [PATCH 03/44] Add unit test for generator --- ge/common/ge/plugin_manager.cc | 1 + ge/generator/ge_generator.cc | 8 +- tests/ut/ge/CMakeLists.txt | 49 ++++++------ .../ut/ge/generator/ge_generator_unittest.cc | 76 +++++++++++++++++++ 4 files changed, 108 insertions(+), 26 deletions(-) create mode 100644 tests/ut/ge/generator/ge_generator_unittest.cc diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 38de251e..4e588f29 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -53,6 +53,7 @@ string PluginManager::GetPath() { GELOGW("Failed to read the shared library file path!"); return string(); } else { + GE_IF_BOOL_EXEC(dl_info.dli_fname == nullptr, return string()); std::string so_path = dl_info.dli_fname; char path[MMPA_MAX_PATH] = {0}; if (so_path.length() >= MMPA_MAX_PATH) { diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 072880fa..32d9e5a1 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -671,6 +671,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { + GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); + impl_->is_offline_ = is_offline; if (!is_offline) { (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); } @@ -709,8 +711,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in GELOGI("ATC parser success in single op build."); GeRootModelPtr ge_root_model = nullptr; - GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); - impl_->is_offline_ = is_offline; GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); map op_attrs = op_desc_tmp->GetAllAttrs(); GE_CHECK_NOTNULL(ge_root_model); @@ -758,7 +758,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector + +#define private public +#define protected public +#include "generator/ge_generator.h" +#include "graph/utils/tensor_utils.h" + +using namespace std; + +namespace ge { +class UtestGeGenerator : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestGeGenerator, test_build_single_op_offline) { + GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor_desc, 512); + + shared_ptr op_desc = make_shared("Add", "add"); + EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); + + GeTensor tensor(tensor_desc); + const vector inputs = { tensor, tensor }; + const vector outputs = { tensor }; + + // not Initialize, impl is null. + GeGenerator generator; + EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), PARAM_INVALID); + + // const map &options + generator.Initialize({}); + EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); +} + +TEST_F(UtestGeGenerator, test_build_single_op_online) { + GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor_desc, 512); + + shared_ptr op_desc = make_shared("Add", "add"); + EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); + + GeTensor tensor(tensor_desc); + const vector inputs = { tensor, tensor }; + const vector outputs = { tensor }; + + // not Initialize, impl is null. + GeGenerator generator; + generator.Initialize({}); + ModelBufferData model_buffer; + EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); +} + +} // namespace ge From a410300f2fe89f9e2d55c8e5082403637dd1b8c3 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Mon, 22 Feb 2021 14:46:47 +0800 Subject: [PATCH 04/44] add control edge between iter active node and fpbp assign_add node --- ge/graph/passes/flow_ctrl_pass.cc | 23 ++++++++++++++++++----- ge/graph/passes/flow_ctrl_pass.h | 3 +++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 5a294aa2..9d441ed5 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -80,6 +80,16 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { graph_change = true; } } + + // add edge operation below depends on memcpy node in itertor loop set single stream,or may cause block + for (auto &active_node : active_nodes_in_iter_loop_) { + auto ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(), + assign_add_node_in_fpbp_loop_->GetInControlAnchor()); + if (ret != GRAPH_SUCCESS) { + GELOGW("add control edge between iter_loop_node:%s and fpbp_loop_node:%s fail, may cause block", + active_node->GetName().c_str(), assign_add_node_in_fpbp_loop_->GetName().c_str()); + } + } GELOGI("FlowCtrl pass end, graph is %s.", graph_change ? "changed" : "not changed"); return graph_change ? SUCCESS : NOT_CHANGED; } @@ -279,16 +289,16 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co * loopIncrement */ // Insert AssignAdd node - NodePtr assign_add_node = + assign_add_node_in_fpbp_loop_ = InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); - if (assign_add_node == nullptr || switch_node == nullptr) { + if (assign_add_node_in_fpbp_loop_ == nullptr || switch_node == nullptr) { GELOGE(PARAM_INVALID, "assign add node or switch node is null"); return FAILED; } string active_name = switch_node->GetName() + "_StreamActive"; // add attr for stream assign model to break branch. - GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed"); + GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node_in_fpbp_loop_, active_name), "set stream label failed"); // used for stream assign to find true branch GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); @@ -304,13 +314,15 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); // add ctrl edges - graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor()); + graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), + assign_add_node_in_fpbp_loop_->GetInControlAnchor()); if (add_ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Add switch_node to assign_add_node ctrl edge failed, add_ret=%u.", add_ret); return FAILED; } - add_ret = GraphUtils::AddEdge(assign_add_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); + add_ret = GraphUtils::AddEdge(assign_add_node_in_fpbp_loop_->GetOutControlAnchor(), + active_node->GetInControlAnchor()); if (add_ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Add assign_add_node to active_node ctrl edge failed, add_ret=%u.", add_ret); return FAILED; @@ -533,6 +545,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); // used for stream assign to find active stream GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); + active_nodes_in_iter_loop_.push_back(active_node); return SUCCESS; } } // namespace ge diff --git a/ge/graph/passes/flow_ctrl_pass.h b/ge/graph/passes/flow_ctrl_pass.h index 35270946..74f3cce0 100755 --- a/ge/graph/passes/flow_ctrl_pass.h +++ b/ge/graph/passes/flow_ctrl_pass.h @@ -142,6 +142,9 @@ class FlowCtrlPass : public GraphPass { /// false: only one dataSet exist /// bool CheckMultiDataSet(ComputeGraphPtr &compute_graph); + + NodePtr assign_add_node_in_fpbp_loop_ = nullptr; + std::vector active_nodes_in_iter_loop_; }; } // namespace ge From 263f43abe43cd845a0e83e0cee3559701e3f1aa6 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Mon, 22 Feb 2021 16:16:31 +0800 Subject: [PATCH 05/44] modified: ge/graph/passes/no_use_reshape_remove_pass.cc --- ge/graph/passes/no_use_reshape_remove_pass.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index 66a798a5..44f520f0 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -83,10 +83,17 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) { } if (to_be_deleted) { GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str()); - auto ret = PassUtils::UnlinkNodeWithControlCopy(node, kReshapeShapeIndex); - if (ret != SUCCESS) { - GELOGE(ret, "DimensionAdjustPass unlink node with control copy fail."); - return ret; + // if shape_input has no any input,which means a single const, it can be unlink from reshape + // op(x) const(shape) + // \ / + // reshape + auto shape_input_anchor = node->GetInDataAnchor(kReshapeShapeIndex); + if (shape_input_anchor != nullptr) { + auto shape_input = shape_input_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(shape_input); + if (shape_input->GetInAllNodes().empty()) { + shape_input_anchor->UnlinkAll(); + } } return IsolateAndDeleteNode(node, {kReshapeDataIndex}); } From 1db59ce1bce062c6cd32107d8d21392cb635a187 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Tue, 9 Feb 2021 11:10:11 +0800 Subject: [PATCH 06/44] invoke sub kernel with tiling_key in dynamic shape & all scene --- ge/hybrid/model/hybrid_model_builder.cc | 4 +- .../node_executor/aicore/aicore_op_task.cc | 183 ++++++++++++++--- .../node_executor/aicore/aicore_op_task.h | 34 ++++ ge/single_op/single_op_model.cc | 20 +- ge/single_op/single_op_model.h | 2 +- ge/single_op/task/op_task.cc | 30 ++- ge/single_op/task/op_task.h | 8 + ge/single_op/task/tbe_task_builder.cc | 188 ++++++++++++++---- ge/single_op/task/tbe_task_builder.h | 33 ++- tests/depends/runtime/src/runtime_stub.cc | 7 + tests/ut/ge/CMakeLists.txt | 6 + tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 101 ++++++++++ .../ge/single_op/single_op_task_unittest.cc | 117 +++++++++++ third_party/fwkacllib/inc/runtime/kernel.h | 35 ++++ third_party/fwkacllib/inc/runtime/rt_model.h | 13 ++ 15 files changed, 694 insertions(+), 87 deletions(-) create mode 100644 tests/ut/ge/hybrid/ge_hybrid_unittest.cc create mode 100644 tests/ut/ge/single_op/single_op_task_unittest.cc diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d2862553..7ea9e446 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() { op_index = task_def.kernel_ex().op_index(); } else if (task_type == RT_MODEL_TASK_HCCL) { op_index = task_def.kernel_hccl().op_index(); + } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + op_index = task_def.kernel_with_handle().context().op_index(); } else { GELOGD("Skip task type: %d", static_cast(task_type)); continue; @@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() { } auto &node = iter->second; - if (task_type == RT_MODEL_TASK_KERNEL) { + if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 6ab62f3f..f3699b6c 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; } // namespace +TbeHandleHolder::TbeHandleHolder(void *bin_handle) + : bin_handle_(bin_handle) {} + +TbeHandleHolder::~TbeHandleHolder() { + if (bin_handle_ != nullptr) { + GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); + } +} + +bool TbeHandleRegistry::AddHandle(std::unique_ptr &&holder) { + auto ret = registered_handles_.emplace(std::move(holder)); + return ret.second; +} + Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); @@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { if (rt_ret != RT_ERROR_NONE || is_single_op_) { void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { - GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); rtDevBinary_t binary; std::string json_string; GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), @@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); } else { - GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); kernel_store.ReferTBEHandle(stub_name_.c_str()); } std::string kernel_name; @@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { return SUCCESS; } -Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { - GE_CHK_STATUS_RET(ValidateTaskDef(task_def), - "[%s] Failed to validate task def: [%s]", - op_desc.GetName().c_str(), - task_def.DebugString().c_str()); +Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { + TbeHandleRegistry ®istry = TbeHandleRegistry::GetInstance(); + auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); + return INTERNAL_ERROR; + } + void *bin_handle = nullptr; + GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); + rtDevBinary_t binary; + std::string json_string; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), + GELOGI("Get original type of session_graph_id.")); + if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else { + GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); + return PARAM_INVALID; + } + binary.version = 0; + binary.data = tbe_kernel->GetBinData(); + binary.length = tbe_kernel->GetBinDataSize(); + GELOGI("TBE: binary.length: %lu", binary.length); + GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); + handle_ = bin_handle; + auto holder = std::unique_ptr(new (std::nothrow) TbeHandleHolder(handle_)); + if (holder == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + if (!registry.AddHandle(std::move(holder))) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + return SUCCESS; +} + +Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); - GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); - GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); - // malloc args memory args_.reset(new(std::nothrow) uint8_t[args_size_]); GE_CHECK_NOTNULL(args_); + if (kernel_def.args().size() < args_size_) { + GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); + return INTERNAL_ERROR; + } errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); if (err != EOK) { GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); @@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef block_dim_, arg_base_, args_size_); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { + const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); + const domi::KernelContext &context = kernel_with_handle.context(); + + GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); + original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_"; + node_info_ = kernel_with_handle.node_info() + "/"; + args_size_ = kernel_with_handle.args_size(); + block_dim_ = kernel_with_handle.block_dim(); + // malloc args memory + args_.reset(new(std::nothrow) uint8_t[args_size_]); + GE_CHECK_NOTNULL(args_); + if (kernel_with_handle.args().size() < args_size_) { + GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); + return INTERNAL_ERROR; + } + errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); + + if (err != EOK) { + GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); + return INTERNAL_ERROR; + } + if (context.args_offset().size() < sizeof(uint16_t)) { + GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); + return INTERNAL_ERROR; + } + + const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); + uint32_t offset = *args_offset_buffer; + if (offset > args_size_) { + GELOGE(INTERNAL_ERROR, + "[%s] Arg offset out of range. offset = %u, arg size = %u", + GetName().c_str(), + offset, + args_size_); + return INTERNAL_ERROR; + } + + arg_base_ = reinterpret_cast(args_.get() + offset); + max_arg_count_ = (args_size_ - offset) / sizeof(void *); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { + GE_CHK_STATUS_RET(ValidateTaskDef(task_def), + "[%s] Failed to validate task def: [%s]", + op_desc.GetName().c_str(), + task_def.DebugString().c_str()); + + if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { + GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); + } else { + GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); + } return SUCCESS; } Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { auto task_type = static_cast(task_def.type()); - if (task_type != RT_MODEL_TASK_KERNEL) { + if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast(task_type)); return INTERNAL_ERROR; } - - const domi::KernelDef &kernel_def = task_def.kernel(); - const domi::KernelContext &context = kernel_def.context(); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type != ccKernelType::TE) { GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); @@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { } Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { - if (tiling_buffer_ != nullptr) { + if (is_dynamic_) { return UpdateTilingInfo(context); } - return SUCCESS; } @@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { clear_atomic_ = tiling_info.clear_atomic; tiling_data_ = tiling_info.tiling_data.str(); + tiling_key_ = tiling_info.tiling_key; + GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { - GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); + GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); + return SUCCESS; + } + if (tiling_buffer_ == nullptr) { + GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!"); return INTERNAL_ERROR; } @@ -296,16 +409,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { } Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { - GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); - GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + if (handle_ != nullptr) { + std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_); + std::string kernel_info = node_info_ + std::to_string(tiling_key_); + GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(), + block_dim_); + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr, + stream, kernel_info.c_str())); + GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(), + block_dim_); + } else { + GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); + GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + } return SUCCESS; } Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { - bool dynamic_supported = false; - (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); - if (!dynamic_supported) { + (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_); + if (!is_dynamic_) { GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); return SUCCESS; } @@ -314,22 +437,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { int64_t max_size = -1; (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); - if (max_size <= 0) { + if (max_size < 0) { GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); return PARAM_INVALID; } auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); - tiling_buffer_ = TensorBuffer::Create(allocator, static_cast(max_size)); - GE_CHECK_NOTNULL(tiling_buffer_); + if (max_size > 0) { + tiling_buffer_ = TensorBuffer::Create(allocator, static_cast(max_size)); + GE_CHECK_NOTNULL(tiling_buffer_); + GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); + } else { + GELOGD("op_param_size is 0, no need to create tiling buffer."); + } - GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); return SUCCESS; } bool AiCoreOpTask::IsDynamicShapeSupported() { - return tiling_buffer_ != nullptr; + return is_dynamic_; } const std::string &AiCoreOpTask::GetName() const { diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 69a74ea9..af09c2af 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -28,6 +28,32 @@ namespace ge { namespace hybrid { +class TbeHandleHolder { + public: + TbeHandleHolder(void *bin_handle); + ~TbeHandleHolder(); + + void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } + void *GetBinHandle() { return bin_handle_; } + + private: + friend class TbeHandleRegistry; + void *bin_handle_ = nullptr; +}; + +class TbeHandleRegistry { + public: + static TbeHandleRegistry &GetInstance() { + static TbeHandleRegistry instance; + return instance; + } + + bool AddHandle(std::unique_ptr &&holder); + + private: + std::set> registered_handles_; +}; + class AiCoreOpTask { public: AiCoreOpTask() = default; @@ -67,6 +93,9 @@ class AiCoreOpTask { Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); Status RegisterTbeHandle(const OpDesc &op_desc); + Status RegisterKernelHandle(const OpDesc &op_desc); + Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); + Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); std::string stub_name_; void *stub_func_ = nullptr; @@ -76,6 +105,11 @@ class AiCoreOpTask { bool clear_atomic_ = true; bool is_single_op_ = false; std::vector output_indices_to_skip_; + string original_kernel_key_; + string node_info_; + uint32_t tiling_key_ = 0; + void *handle_ = nullptr; + bool is_dynamic_ = false; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 7fcb0b8f..37297fdd 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; - auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); + auto ret = BuildKernelTask(task_def, &tbe_task); if (ret != SUCCESS) { return ret; } @@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { } } -Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { +Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { GE_CHECK_NOTNULL(task); - const auto &context = kernel_def.context(); + auto task_type = static_cast(task_def.type()); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); auto iter = op_list_.find(context.op_index()); if (iter == op_list_.end()) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); @@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa return ACL_ERROR_GE_MEMORY_ALLOCATION; } - auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def); + auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); auto ret = builder.BuildTask(*tbe_task, model_params_); if (ret != SUCCESS) { delete tbe_task; @@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { } Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { - const domi::KernelDef &kernel_def = task_def.kernel(); - const auto &context = kernel_def.context(); + auto task_type = static_cast(task_def.type()); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); + auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; - GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); tbe_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(tbe_task); } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { @@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), task_def.DebugString().c_str()); auto task_type = static_cast(task_def.type()); - if (task_type == RT_MODEL_TASK_KERNEL) { + if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { if (single_op.op_task_ != nullptr) { GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6637271c..684dab77 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -67,7 +67,7 @@ class SingleOpModel { Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); - Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); + Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index ff200806..4f1c1f03 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -93,6 +93,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr &&args, size_t arg_size op_desc_ = op_desc; } +void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, + const OpDescPtr &op_desc, + const domi::KernelDefWithHandle &kernel_def_with_handle) { + SetKernelArgs(std::move(args), arg_size, block_dim, op_desc); + original_kernel_key_ = kernel_def_with_handle.original_kernel_key(); + node_info_ = kernel_def_with_handle.node_info(); +} + void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { @@ -165,6 +173,10 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; } uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } +void TbeOpTask::SetHandle(void *handle) { + this->handle_ = handle; +} + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -204,8 +216,9 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve } block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); - GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, - tiling_data_.size()); + tiling_key_ = run_info.tiling_key; + GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, + tiling_data_.size(), tiling_key_); GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); return SUCCESS; @@ -329,8 +342,17 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, } GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); - GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + if (handle_ == nullptr) { + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); + GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + } else { + std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); + std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, + stream, kernel_info.c_str())); + GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); + } + return SUCCESS; } diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 78e1f6f0..be7f4aab 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -78,6 +78,8 @@ class TbeOpTask : public OpTask { void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); + void SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, + const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; @@ -87,6 +89,7 @@ class TbeOpTask : public OpTask { const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); uint32_t GetTaskType() const override; + void SetHandle(void *handle); private: friend class SingleOpModel; @@ -107,6 +110,11 @@ class TbeOpTask : public OpTask { std::string tiling_data_; std::vector workspaces_; NodePtr node_; + + uint32_t tiling_key_ = 0; + void* handle_ = nullptr; + std::string original_kernel_key_; + std::string node_info_; }; class AiCpuBaseTask : public OpTask { diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 6eee61d0..606f8087 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() { } } +HandleHolder::HandleHolder(void *bin_handle) + : bin_handle_(bin_handle) {} + +HandleHolder::~HandleHolder() { + if (bin_handle_ != nullptr) { + GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); + } +} + const char *KernelBinRegistry::GetUnique(const string &stub_func) { std::lock_guard lock(mutex_); auto it = unique_stubs_.find(stub_func); @@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr< return ret.second; } -TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) +bool HandleRegistry::AddHandle(std::unique_ptr &&holder) { + auto ret = registered_handles_.emplace(std::move(holder)); + return ret.second; +} + +TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) : node_(node), op_desc_(node->GetOpDesc()), - kernel_def_(kernel_def), + task_def_(task_def), + kernel_def_(task_def.kernel()), + kernel_def_with_handle_(task_def.kernel_with_handle()), stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, @@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi binary.data = kernel_bin.GetBinData(); binary.length = kernel_bin.GetBinDataSize(); binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; - auto ret = rtDevBinaryRegister(&binary, bin_handle); + Status ret = 0; + if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { + ret = rtRegisterAllKernel(&binary, bin_handle); + } else { + ret = rtDevBinaryRegister(&binary, bin_handle); + } if (ret != RT_ERROR_NONE) { - GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), + GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), param.core_type, static_cast(ret)); return ret; } @@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, const SingleOpModelParam ¶m) { - std::string kernel_name; - GetKernelName(op_desc_, kernel_name); - void *handle = nullptr; auto ret = DoRegisterBinary(tbe_kernel, &handle, param); if (ret != SUCCESS) { return ret; } + if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { + *bin_handle = handle; + return SUCCESS; + } ret = DoRegisterMeta(handle); if (ret != SUCCESS) { @@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const return ret; } + std::string kernel_name; + GetKernelName(op_desc_, kernel_name); ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); if (ret != SUCCESS) { GE_CHK_RT(rtDevBinaryUnRegister(handle)); @@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam void *bin_handle = nullptr; auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); - if (ret == SUCCESS) { - holder->SetBinHandle(bin_handle); - if (!registry.AddKernel(stub_name_, std::move(holder))) { - // should not happen. only one thread can reach here - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); - return ACL_ERROR_GE_INTERNAL_ERROR; - } + if (ret != SUCCESS) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + holder->SetBinHandle(bin_handle); + if (!registry.AddKernel(stub_name_, std::move(holder))) { + // should not happen. only one thread can reach here + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; } } @@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam return SUCCESS; } +Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m) { + GELOGD("RegisterKernelWithHandle begin."); + HandleRegistry ®istry = HandleRegistry::GetInstance(); + auto tbe_kernel = GetTbeKernel(op_desc_); + if (tbe_kernel == nullptr) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", + op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + void *bin_handle = nullptr; + auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param); + if (ret != SUCCESS) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + handle_ = bin_handle; + auto holder = std::unique_ptr(new (std::nothrow) HandleHolder(handle_)); + if (holder == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + if (!registry.AddHandle(std::move(holder))) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + + return SUCCESS; +} + Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { const std::string &sm_desc_str = kernel_def_.sm_desc(); if (sm_desc_str.empty()) { @@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m } } - auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast(rtRet)); - return rtRet; + auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast(rt_ret)); + return rt_ret; } - rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); - if (rtRet != RT_ERROR_NONE) { + rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { (void)rtMemFreeManaged(*sm_desc); - GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast(rtRet)); - return rtRet; + GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast(rt_ret)); + return rt_ret; } } @@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); GE_CHECK_NOTNULL(args); - auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); - return RT_ERROR_TO_GE_STATUS(rtRet); + auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); + return RT_ERROR_TO_GE_STATUS(rt_ret); } const domi::KernelContext &context = kernel_def_.context(); @@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); - return RT_ERROR_TO_GE_STATUS(rtRet); + rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); + return RT_ERROR_TO_GE_STATUS(rt_ret); } } - task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); + + return SUCCESS; +} + +Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, + const OpDescPtr &op_desc) { + size_t arg_size = kernel_def_with_handle_.args_size(); + auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); + GE_CHECK_NOTNULL(args); + + auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); + return rt_ret; + } + + const domi::KernelContext &context = kernel_def_with_handle_.context(); + const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); + uint16_t offset = *args_offset_tmp; + + bool is_dynamic = false; + (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); + if (is_dynamic) { + GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); + } else { + // copy args + std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); + void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); + uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); + rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); + return rt_ret; + } + } + task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, + kernel_def_with_handle_); + return SUCCESS; } Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { GELOGD("Build tbe task begin"); - auto ret = SetKernelArgs(task, param, op_desc_); + auto task_type = static_cast(task_def_.type()); + auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : + SetKernelArgs(task, param, op_desc_); if (ret != SUCCESS) { return ret; } - ret = RegisterKernel(task, param); + ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : + RegisterKernel(task, param); + task.SetHandle(handle_); if (ret != SUCCESS) { return ret; } + auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); - void *stub_func = nullptr; - auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); - if (rtRet != SUCCESS) { - GELOGE(rtRet, "rtGetFunctionByName failed."); - return RT_ERROR_TO_GE_STATUS(rtRet); + if (task_type != RT_MODEL_TASK_ALL_KERNEL) { + void *stub_func = nullptr; + auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func); + if (rt_ret != SUCCESS) { + GELOGE(rt_ret, "rtGetFunctionByName failed."); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + task.SetStubFunc(stub_name_, stub_func); } - task.SetStubFunc(stub_name_, stub_func); return SUCCESS; } @@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { int64_t max_size = -1; (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); - if (max_size <= 0) { + if (max_size < 0) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); return ACL_ERROR_GE_PARAM_INVALID; } - void *tiling_buffer = nullptr; - GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast(max_size), RT_MEMORY_HBM)); - GE_CHECK_NOTNULL(tiling_buffer); - GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); + if (max_size > 0) { + GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast(max_size), RT_MEMORY_HBM)); + GE_CHECK_NOTNULL(tiling_buffer); + GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); + } task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); return SUCCESS; diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h index 5cd5c463..8af9a68d 100755 --- a/ge/single_op/task/tbe_task_builder.h +++ b/ge/single_op/task/tbe_task_builder.h @@ -42,6 +42,19 @@ class KernelHolder { std::shared_ptr kernel_bin_; }; +class HandleHolder { + public: + HandleHolder(void *bin_handle); + ~HandleHolder(); + + void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } + void *GetBinHandle() { return bin_handle_; } + + private: + friend class HandleRegistry; + void *bin_handle_ = nullptr; +}; + class KernelBinRegistry { public: static KernelBinRegistry &GetInstance() { @@ -61,9 +74,22 @@ class KernelBinRegistry { std::mutex mutex_; }; +class HandleRegistry { + public: + static HandleRegistry &GetInstance() { + static HandleRegistry instance; + return instance; + } + + bool AddHandle(std::unique_ptr &&holder); + + private: + std::set> registered_handles_; +}; + class TbeTaskBuilder { public: - TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); + TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); ~TbeTaskBuilder() = default; Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); @@ -71,9 +97,11 @@ class TbeTaskBuilder { private: Status InitTilingInfo(TbeOpTask &task); Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); + Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); + Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m); Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, const SingleOpModelParam ¶m); Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; @@ -83,8 +111,11 @@ class TbeTaskBuilder { const NodePtr node_; const OpDescPtr op_desc_; + const domi::TaskDef &task_def_; const domi::KernelDef &kernel_def_; + const domi::KernelDefWithHandle &kernel_def_with_handle_; const std::string stub_name_; + void *handle_ = nullptr; }; } // namespace ge diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 1323a76a..3808e5d6 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } +rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } + rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } +rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, + rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) { + return RT_ERROR_NONE; +} + rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, rtStream_t stream) { return RT_ERROR_NONE; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 7c49c0a7..baba874f 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -759,12 +759,17 @@ set(SINGLE_OP_TEST_FILES #"single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" "single_op/stream_resource_unittest.cc" + "single_op/single_op_task_unittest.cc" ) set(PROFILING_MNG_TEST_FILES "profiling/ge_profiling_manager_unittest.cc" ) +set(HYBRID_TEST_FILES + "hybrid/ge_hybrid_unittest.cc" +) + set(OTHERS_TEST_FILES "plugin_manager/ge_util_unittest.cc" ) @@ -1059,6 +1064,7 @@ add_executable(ut_libge_distinct_load_utest ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} ${PROFILING_MNG_TEST_FILES} + ${HYBRID_TEST_FILES} ) target_compile_options(ut_libge_distinct_load_utest PRIVATE diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc new file mode 100644 index 00000000..61f99950 --- /dev/null +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "runtime/rt.h" + +#define protected public +#define private public +#include "hybrid/model/hybrid_model_builder.h" +#include "hybrid/model/hybrid_model.h" +#include "model/ge_model.h" +#include "model/ge_root_model.h" + +#include "hybrid/node_executor/aicore/aicore_op_task.h" +#include "framework/common/taskdown_common.h" +#include "framework/common/debug/log.h" +#include "graph/ge_context.h" +#include "hybrid/executor/hybrid_execution_context.h" +#include "hybrid/node_executor/aicore/aicore_task_builder.h" +#include "graph/load/model_manager/tbe_handle_store.h" +#include "graph/types.h" + +#undef private +#undef protected + +using namespace std; +using namespace testing; +using namespace ge; + +class UtestGeHybrid : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { + auto op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + op_desc->SetId(0); + + op_desc->SetWorkspace({}); + ; + op_desc->SetWorkspaceBytes({}); + op_desc->SetInputOffset({}); + op_desc->SetOutputOffset({}); + + ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); + bool support_dynamic = true; + ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); + return op_desc; +} + +TEST_F(UtestGeHybrid, aicore_op_task_init_success) { + // build aicore task + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + domi::TaskDef task_def; + task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); + domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); + kernel_with_handle->set_original_kernel_key(""); + kernel_with_handle->set_node_info(""); + kernel_with_handle->set_block_dim(32); + kernel_with_handle->set_args_size(64); + string args(64, '1'); + kernel_with_handle->set_args(args.data(), 64); + domi::KernelContext *context = kernel_with_handle->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + std::vector kernelBin; + TBEKernelPtr tbe_kernel = std::make_shared("name/Add", std::move(kernelBin)); + op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); + std::string kernel_name("kernel/Add"); + AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); + ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); + char *handle = ""; + aicore_task->handle_ = handle; + aicore_task->tiling_key_ = 1; + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); +} \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc new file mode 100644 index 00000000..a17c9012 --- /dev/null +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/load/model_manager/model_utils.h" +#include "graph/utils/graph_utils.h" +#include "runtime/rt.h" + +#define protected public +#define private public +#include "single_op/single_op_model.h" +#include "single_op/task/tbe_task_builder.h" +#include "single_op/task/op_task.h" +#include "single_op/task/tbe_task_builder.h" +#include "external/register/op_tiling_registry.h" +#undef private +#undef protected + +using namespace std; +using namespace testing; +using namespace ge; +using namespace optiling; + +class UtestSingleOpTask : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestSingleOpTask, test_build_kernel_task) { + string model_data_str = "123456789"; + SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); + model.input_offset_list_.push_back(0); + model.input_sizes_.push_back(16); + + model.output_offset_list_.push_back(0); + model.output_sizes_.push_back(16); + + auto graph = make_shared("graph"); + auto op_desc = make_shared("Add", "Add"); + std::vector kernelBin; + TBEKernelPtr tbe_kernel = std::make_shared("name/Add", std::move(kernelBin)); + op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); + std::string kernel_name("kernel/Add"); + AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); + + vector shape{16, 16}; + GeShape ge_shape(shape); + GeTensorDesc desc(ge_shape); + op_desc->AddInputDesc(desc); + op_desc->AddOutputDesc(desc); + auto node = graph->AddNode(op_desc); + + std::mutex stream_mu_; + rtStream_t stream_ = nullptr; + StreamResource stream_resource(0); + SingleOp single_op(&stream_resource, &stream_mu_, stream_); + + domi::TaskDef task_def; + task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); + domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); + kernel_with_handle->set_original_kernel_key(""); + kernel_with_handle->set_node_info(""); + kernel_with_handle->set_block_dim(32); + kernel_with_handle->set_args_size(64); + string args(64, '1'); + kernel_with_handle->set_args(args.data(), 64); + domi::KernelContext *context = kernel_with_handle->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + model.op_list_[1] = node; + + TbeOpTask task_tmp; + TbeOpTask *task = &task_tmp; + ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS); + vector input_desc; + vector input_buffers; + vector output_desc; + vector output_buffers; + task->node_ = node; + OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;}; + OpTilingRegistryInterf("Add", op_tiling_func); + ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key"); + ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json"); + char c = '0'; + char* buffer = &c; + task->tiling_buffer_ = buffer; + task->max_tiling_size_ = 64; + task->tiling_data_ = "tiling_data"; + task->arg_size_ = 64; + uint8_t task_args{0}; + task->args_.reset(&task_args); + + ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); + char handle_tmp = '0'; + char *handle = &handle_tmp; + task->SetHandle(handle); + ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); +} \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index dc16ca58..b4500e10 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); #define RT_FUSION_KERNEL_DUMPFLAG (0x04) #define RT_KERNEL_CUSTOM_AICPU (0x08) +/** + * @ingroup rt_kernel + * @brief kernel mode + */ +#define RT_DEFAULT_KERNEL_MODE (0x00) +#define RT_NORMAL_KERNEL_MODE (0x01) +#define RT_ALL_KERNEL_MODE (0x02) + /** * @ingroup rt_kernel * @brief kernel L1 Fusion Dump bit flags @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); */ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); +/** + * @ingroup rt_kernel + * @brief register device binary + * @param [in] bin device binary description + * @param [out] handle device binary handle + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); + /** * @ingroup rt_kernel * @brief register fast memeory device binary @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream); +/** + * @ingroup rt_kernel + * @brief launch kernel with handle to device + * @param [in] handle program + * @param [in] devFunc device function description + * @param [in] blockDim block dimentions + * @param [in] args argments address for kernel function + * @param [in] argsSize argements size + * @param [in] smDesc shared memory description + * @param [in] stream associated stream + * @param [in] kernelInfo kernel info + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, + rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); + /** * @ingroup rt_kernel * @brief launch kernel to device diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 482486a8..798f63ae 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, + RT_MODEL_TASK_ALL_KERNEL, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo { uint16_t *argsOffset; } rtKernelTaskInfo_t; +typedef struct tagAllKernelTaskInfo { + uint16_t blockDim; + uint16_t argsCount; + uint16_t argsSize; + uint16_t reserved; + const void *dev_func; + void *handle; + uint8_t *smDesc; + uint8_t *args; + uint16_t *argsOffset; +} rtAllKernelTaskInfo_t; typedef struct tagKernelTaskInfoEx { uint32_t flags; uint32_t argsSize; @@ -251,6 +263,7 @@ typedef struct tagTaskInfo { union { rtKernelTaskInfoEx_t kernelTaskEx; rtKernelTaskInfo_t kernelTask; + rtAllKernelTaskInfo_t allkernelTask; rtEventTaskInfo_t eventTask; rtStreamSwitchTaskInfo_t streamSwitchTask; rtStreamActiveTaskInfo_t streamActiveTask; From 6a07a8b9d02947440c41e90dccf0f60f2d749999 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 23 Feb 2021 13:51:56 +0800 Subject: [PATCH 07/44] Print UT executable file path when failed. --- build.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/build.sh b/build.sh index 7b6da560..a7e87cd5 100644 --- a/build.sh +++ b/build.sh @@ -76,8 +76,8 @@ checkopts() ENABLE_GE_ST="on" ;; t) - ENABLE_GE_UT="on" - ;; + ENABLE_GE_UT="on" + ;; c) ENABLE_GE_COV="on" ;; @@ -214,13 +214,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} - ${OUTPUT_PATH}/ut_libgraph && - ${OUTPUT_PATH}/ut_libge_multiparts_utest && - ${OUTPUT_PATH}/ut_libge_distinct_load_utest && - ${OUTPUT_PATH}/ut_libge_others_utest && - ${OUTPUT_PATH}/ut_libge_kernel_utest + RUN_TEST_CASE=${OUTPUT_PATH}/ut_libgraph && ${RUN_TEST_CASE} && + RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_multiparts_utest && ${RUN_TEST_CASE} && + RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_distinct_load_utest && ${RUN_TEST_CASE} && + RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_others_utest && ${RUN_TEST_CASE} && + RUN_TEST_CASE=${OUTPUT_PATH}/ut_libge_kernel_utest && ${RUN_TEST_CASE} if [[ "$?" -ne 0 ]]; then echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" + echo -e "\033[31m${RUN_TEST_CASE}\033[0m" exit 1; fi echo "Generating coverage statistics, please wait..." From f8400e4a082de0939593cac1436cc631eaae3678 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Tue, 23 Feb 2021 16:35:22 +0800 Subject: [PATCH 08/44] bugfix for variable->broadcast addr --- .../ge/graph/build/mem_assigner_unittest.cc | 127 +++++++++++++----- 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index f53a0732..0024185b 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -25,10 +25,12 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "omg/omg_inner_types.h" +#include "../passes/graph_builder_utils.h" #define protected public #define private public #include "graph/build/memory/binary_block_mem_assigner.h" +#include "graph/build/memory/graph_mem_assigner.h" #include "graph/build/memory/hybrid_mem_assigner.h" #include "graph/build/memory/max_block_mem_assigner.h" #undef protected @@ -41,7 +43,7 @@ using domi::GetContext; class UtestMemoryAssignerTest : public testing::Test { public: - ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { ge::OpDescPtr op_def = make_shared(name, type); auto desc_temp_ptr = make_shared(); auto desc_temp = *desc_temp_ptr; @@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test { op_def->SetWorkspaceBytes(workspace_bytes); return op_def; } - void make_graph(ge::ComputeGraphPtr graph) { - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); + ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr op_def = make_shared(name, type); + auto desc_temp_ptr = make_shared(); + auto desc_temp = *desc_temp_ptr; + + TensorUtils::SetSize(desc_temp, 1024); + op_def->AddInputDesc(desc_temp); + + auto desc_output_ptr = make_shared(); + auto desc_output = *desc_output_ptr; + TensorUtils::SetSize(desc_output, 6500); + ge::TensorUtils::SetReuseInput(desc_output, true); + ge::TensorUtils::SetReuseInputIndex(desc_output, 0); + op_def->AddOutputDesc(desc_output); + + std::vector workspace_bytes; + workspace_bytes.push_back(wsByte); + op_def->SetWorkspaceBytes(workspace_bytes); + return op_def; + } + void MakeGraph(ge::ComputeGraphPtr &graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); op_def_a->SetStreamId(0); - ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); op_def_b->SetStreamId(0); - ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000); op_def_c->SetStreamId(1); - ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000); op_def_d->SetStreamId(2); - ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000); op_def_e->SetStreamId(3); - ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000); op_def_f->SetStreamId(2); - ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000); op_def_g->SetStreamId(3); - ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000); + ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000); op_def_h->SetStreamId(2); - ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000); + ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000); op_def_i->SetStreamId(2); - ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT); + ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT); op_def_j->SetStreamId(3); // add node @@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } - void make_reuse_graph(ge::ComputeGraphPtr graph) { - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); - ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); - - ge::OpDescPtr op_def_c = make_shared("C", "Some"); - auto desc_input_ptr = make_shared(); - auto desc_input = *desc_input_ptr; - - TensorUtils::SetSize(desc_input, 1024); - op_def_c->AddInputDesc(desc_input); - - auto desc_output_ptr = make_shared(); - auto desc_output = *desc_output_ptr; - TensorUtils::SetSize(desc_output, 6500); - ge::TensorUtils::SetReuseInput(desc_output, true); - ge::TensorUtils::SetReuseInputIndex(desc_output, 0); - op_def_c->AddOutputDesc(desc_output); - + void MakeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); + ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000); ge::OpDescPtr op_def_d = make_shared("D", "CONSTANT"); ge::NodePtr node_a = graph->AddNode(op_def_a); @@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } + ComputeGraphPtr MakeCascadeContinuousMemoryGraph() { + ge::ut::GraphBuilder builder("graph"); + auto data = builder.AddNode("data", "Data", 1, 1); + auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); + auto addn2 = builder.AddNode("addn2", "AddN", 1, 1); + auto addn3 = builder.AddNode("addn3", "AddN", 1, 1); + auto concat1 = builder.AddNode("concat1", "Concat", 2, 1); + auto concat2 = builder.AddNode("concat2", "Concat", 2, 1); + auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); + + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); + + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); + + addn1->GetOpDesc()->SetOutputOffset({100}); + addn2->GetOpDesc()->SetOutputOffset({200}); + concat1->GetOpDesc()->SetOutputOffset({100}); + addn3->GetOpDesc()->SetOutputOffset({700}); + concat2->GetOpDesc()->SetOutputOffset({500}); + + ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200}); + ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300}); + + + builder.AddDataEdge(data, 0, addn1, 0); + builder.AddDataEdge(data, 0, addn2, 0); + builder.AddDataEdge(addn1, 0, concat1, 0); + builder.AddDataEdge(addn2, 0, concat1, 1); + builder.AddDataEdge(concat1, 0, concat2, 0); + builder.AddDataEdge(addn3, 0, concat2, 1); + + return builder.GetGraph(); + } + protected: void SetUp() {} @@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test { /* TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { ge::ComputeGraphPtr graph = make_shared(""); - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); ge::NodePtr node_a = graph->AddNode(op_def_a); MemoryBlock* memory_block = new MemoryBlock(0); memory_block->Init(1, kOutput, node_a, 0, 1); @@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner { // when check GetMemoryRanges return fail, Assign return fail TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { ge::ComputeGraphPtr graph = make_shared(""); - make_graph(graph); + MakeGraph(graph); std::map anchor_to_symbol; std::map> symbol_to_anchors; EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS); @@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors); EXPECT_EQ(mock_assigner.Assign(), FAILED); } + +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { + ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph(); + auto addn1 = graph->FindNode("addn1"); + auto addn2 = graph->FindNode("addn2"); + EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100); + EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200); + GraphMemoryAssigner memoryAssigner(graph); + MemoryOffset memory_offset(RT_MEMORY_HBM, 0); + memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS); + EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500); + EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); +} From e9345e6a99ec02b8990344829cbda7a47feaced2 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 20:00:42 +0800 Subject: [PATCH 09/44] fix out of memory question --- ge/graph/manager/graph_caching_allocator.cc | 12 +-- ge/graph/manager/graph_caching_allocator.h | 8 +- ge/hybrid/executor/worker/execution_engine.cc | 2 + ge/hybrid/node_executor/task_context.cc | 11 ++- ge/hybrid/node_executor/task_context.h | 1 + tests/ut/ge/CMakeLists.txt | 1 + .../graph_caching_allocator_unittest.cc | 76 +++++++++++++++++++ 7 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index bfef4001..dd46e670 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, kBinSizeUnit8 * kMByteSize, kBinSizeUnit32 * kMByteSize, kBinSizeUnit128 * kMByteSize, - kGByteSize, - kBinSizeUnit4 * kGByteSize, - kBinSizeUnit16 * kGByteSize, - kBinSizeUnit26 * kGByteSize}; + kBinSizeUnit256 * kMByteSize, + kBinSizeUnit512 * kMByteSize, + kGByteSize}; static bool BlockComparator(const Block *left, const Block *right) { if (left->size != right->size) { @@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) { size_t GetAllocationSize(size_t size) { size_t index = GetBinIndex(size); - return bin_ranges[index]; + if (bin_ranges[index] >= size) { + return bin_ranges[index]; + } + return kGByteSize * ((size + kGByteSize - 1) / kGByteSize); } /// diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index e024d5cd..42d0952d 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -36,17 +36,17 @@ namespace ge { constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes constexpr size_t kBinSizeUnit4 = 4; constexpr size_t kBinSizeUnit8 = 8; -constexpr size_t kBinSizeUnit16 = 16; -constexpr size_t kBinSizeUnit26 = 26; constexpr size_t kBinSizeUnit32 = 32; constexpr size_t kBinSizeUnit128 = 128; +constexpr size_t kBinSizeUnit256 = 256; +constexpr size_t kBinSizeUnit512 = 512; -constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold +constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; constexpr size_t kMByteSize = 1048576; // 1024 * 1024 constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 -static const uint32_t kNumBins = 8; +static const uint32_t kNumBins = 7; class MemoryAllocator; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 7f4fa78c..fda65cb2 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -323,6 +323,8 @@ Status NodeDoneCallback::OnNodeDone() { node_item.NodeName().c_str()); } + // release workspace + context_->ReleaseWorkspace(); // release inputs for (int i = 0; i < context_->NumInputs(); ++i) { context_->ReleaseInput(i); diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index e3cf5ae1..085970e0 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context, TaskContext::~TaskContext() { GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); - for (auto ws_addr : workspaces_) { - execution_context_->allocator->Deallocate(ws_addr); - } - // release output for (int i = 0; i < NumOutputs(); ++i) { auto output_tensor = MutableOutput(i); @@ -49,6 +45,13 @@ TaskContext::~TaskContext() { } } +void TaskContext::ReleaseWorkspace() { + GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str()); + for (auto ws_addr : workspaces_) { + execution_context_->allocator->Deallocate(ws_addr); + } +} + std::unique_ptr TaskContext::Create(NodeState *node_state, GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) { diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index dc4ff058..f29918b4 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -56,6 +56,7 @@ class TaskContext { void ReleaseInputsAndOutputs(); bool NeedCallback(); void ReleaseInput(int index); + void ReleaseWorkspace(); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; TensorValue *MutableOutput(int index); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 184403ed..0247a694 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -752,6 +752,7 @@ set(MULTI_PARTS_TEST_FILES "graph/build/mem_assigner_unittest.cc" "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" + "graph/manager/graph_caching_allocator_unittest.cc" "session/omg_omg_unittest.cc" ) diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc new file mode 100644 index 00000000..efadcdf6 --- /dev/null +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" + +#define protected public +#define private public +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_allocator.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestGraphCachingAllocatorTest : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +TEST_F(UtestGraphCachingAllocatorTest, initialize_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS); + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks(); + MemManager::Instance().Finalize(); +} \ No newline at end of file From b9395f2ce925de617f01f2f51d140c41a30dcb1b Mon Sep 17 00:00:00 2001 From: wqtshg Date: Tue, 23 Feb 2021 20:03:22 +0800 Subject: [PATCH 10/44] update submodule --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index a2b80cb2..b6de68fd 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit a2b80cb22a62a6757c7dd31e684ca632e0b79268 +Subproject commit b6de68fdf0f131fd5f8aa3a84245ad7779b348f5 diff --git a/parser b/parser index cfabf622..7a631135 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit cfabf622b803d5957563a73652a0ce5086aab99d +Subproject commit 7a6311351f8294eb11033b10e9f7b2b993cc3c2a From 6eb421f8904a2237ffc3609a44681f2d60810ea0 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 21:00:30 +0800 Subject: [PATCH 11/44] bugfix:fix print shape abnormal question --- ge/graph/manager/graph_caching_allocator.cc | 2 ++ .../graph/manager/graph_caching_allocator_unittest.cc | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index dd46e670..ca5a6c7d 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -121,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { } uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { + GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); uint8_t *ptr = nullptr; size = GetBlockSize(size); Block *block = FindFreeBlock(size, org_ptr, device_id); @@ -255,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui } Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { + GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); auto memory_size = GetAllocationSize(size); const std::string purpose = "Memory for caching."; auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc index efadcdf6..f76a4d4e 100644 --- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -61,6 +61,17 @@ TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { MemManager::Instance().Finalize(); } +TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { std::vector mem_type; mem_type.push_back(RT_MEMORY_HBM); From dcbaf26680289f3e142c8879a6c8f5f192dcd607 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Tue, 23 Feb 2021 21:00:34 +0800 Subject: [PATCH 12/44] modified: ge/graph/passes/no_use_reshape_remove_pass.cc modified: ge/graph/passes/no_use_reshape_remove_pass.h --- ge/graph/passes/no_use_reshape_remove_pass.cc | 45 +++++++++++++------ ge/graph/passes/no_use_reshape_remove_pass.h | 3 ++ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index 44f520f0..c2b8bdad 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -82,21 +82,40 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) { } } if (to_be_deleted) { - GELOGI("NoUseReshapeRemovePass remove useless node:%s", node->GetName().c_str()); - // if shape_input has no any input,which means a single const, it can be unlink from reshape - // op(x) const(shape) - // \ / - // reshape - auto shape_input_anchor = node->GetInDataAnchor(kReshapeShapeIndex); - if (shape_input_anchor != nullptr) { - auto shape_input = shape_input_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(shape_input); - if (shape_input->GetInAllNodes().empty()) { - shape_input_anchor->UnlinkAll(); - } - } + auto ret = TryRemoveConstShapeInput(node); + GE_CHK_STATUS_RET_NOLOG(ret); + GELOGI("NoUseReshapeRemovePass remove useless reshape node:%s", node->GetName().c_str()); return IsolateAndDeleteNode(node, {kReshapeDataIndex}); } return SUCCESS; } + +Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_node) { + auto shape_input_anchor = reshape_node->GetInDataAnchor(kReshapeShapeIndex); + if (shape_input_anchor == nullptr) { + return SUCCESS; + } + auto shape_input = shape_input_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(shape_input); + if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) { + return SUCCESS; + } + // op(x) const(shape) + // \ / + // reshape + // const input can unlink but should copy control_dependency + auto ret = PassUtils::UnlinkNodeWithControlCopy(reshape_node, kReshapeShapeIndex); + if (ret != SUCCESS) { + GELOGE(ret, "Unlink node %s with control copy failed.", shape_input->GetName().c_str()); + return ret; + } + + // remove const without any data_output + if (shape_input->GetOutDataNodesSize() == 0) { + auto ret = IsolateAndDeleteNode(shape_input, {}); + GE_CHK_GRAPH_STATUS_RET(ret, "Fail to remove node %s", shape_input->GetName().c_str()); + GELOGI("Remove useless shape input const %s.", shape_input->GetName().c_str()); + } + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/passes/no_use_reshape_remove_pass.h b/ge/graph/passes/no_use_reshape_remove_pass.h index c142d8d2..3eb6770b 100755 --- a/ge/graph/passes/no_use_reshape_remove_pass.h +++ b/ge/graph/passes/no_use_reshape_remove_pass.h @@ -32,6 +32,9 @@ class NoUseReshapeRemovePass : public BaseNodePass { /// @author /// Status Run(ge::NodePtr &node) override; + + private: + Status TryRemoveConstShapeInput(NodePtr &reshape_node); }; } // namespace ge From d66ef5f2d0e1c93d3b8f2f078450020d56856508 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 22:07:26 +0800 Subject: [PATCH 13/44] fix out of memory question --- ge/graph/passes/net_output_pass.cc | 2 +- ge/graph/passes/prune_pass.cc | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index c6ab062a..c553607f 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); return GE_GRAPH_PARAM_NULLPTR; } - GELOGI("NetOutputPass Run."); + GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str()); NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); // save user targets node SaveAndRemoveTargets(graph); diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index f5f4cbcb..3c95f3b1 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -27,12 +27,11 @@ namespace ge { Status PrunePass::Run(ge::ComputeGraphPtr graph) { - GELOGD("PrunePass Start"); + GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str()); if (graph == nullptr) { GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); return GE_GRAPH_ISNULL; } - std::vector out_nodes; std::unordered_set nodes; for (NodePtr &node_ptr : graph->GetDirectNode()) { @@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) { out_nodes.push_back(node_ptr); } } - if (out_nodes.empty()) { GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); return ge::SUCCESS; From 98995cbae1ef36888883e5ac198e870b229cf4ff Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 22:14:25 +0800 Subject: [PATCH 14/44] fix out of memory question --- ge/graph/passes/flow_ctrl_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 9d441ed5..52a570cb 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { return NOT_CHANGED; } - GELOGI("FlowCtrl pass begin"); + GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); bool graph_change = false; // 1. Add FP/BP flow ctrl (big cycle) for (auto &node : compute_graph->GetDirectNode()) { From db230d8460935731d0a3acaff3d0ab5f31306fb5 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 24 Feb 2021 09:58:18 +0800 Subject: [PATCH 15/44] modified: ge/graph/passes/no_use_reshape_remove_pass.cc --- ge/graph/passes/no_use_reshape_remove_pass.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index c2b8bdad..1da939c6 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -95,7 +95,8 @@ Status NoUseReshapeRemovePass::TryRemoveConstShapeInput(ge::NodePtr &reshape_nod if (shape_input_anchor == nullptr) { return SUCCESS; } - auto shape_input = shape_input_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(shape_input_anchor->GetPeerOutAnchor()); + auto shape_input = shape_input_anchor->GetPeerOutAnchor()->GetOwnerNode(); GE_CHECK_NOTNULL(shape_input); if (shape_input->GetType() != CONSTANT && shape_input->GetType() != CONSTANTOP) { return SUCCESS; From feb085cc742d78cf91c411ae94dd49189cae026f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 24 Feb 2021 11:54:32 +0800 Subject: [PATCH 16/44] Fix bug of ge_executor. --- ge/executor/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 05d627de..3406ab87 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -250,15 +250,14 @@ target_link_options(ge_executor_shared PRIVATE target_link_libraries(ge_executor_shared PRIVATE $ msprofiler + static_mmpa -Wl,--no-as-needed ge_common runtime slog - mmpa graph register error_manager - ascend_hal_stub ascend_protobuf c_sec -Wl,--as-needed From 0f8b045a5f6d32af57b5cc20ba334bed9f4d3d39 Mon Sep 17 00:00:00 2001 From: isaactalx Date: Wed, 24 Feb 2021 16:38:50 +0800 Subject: [PATCH 17/44] static warning clean --- ge/hybrid/executor/hybrid_model_async_executor.cc | 3 ++- ge/hybrid/executor/hybrid_model_pipeline_executor.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 1b5afb83..97fb9d50 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -26,6 +26,7 @@ namespace hybrid { namespace { const int kDataOutputIndex = 0; const size_t kMinimumPiplineStages = 2; +const int kDefaultLoopCount = 10; } HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) : model_(model), run_flag_(false) { @@ -150,7 +151,7 @@ Status HybridModelAsyncExecutor::RunInternal() { GELOGI("HybridModel will execute in pipeline mode"); auto iter_per_run = std::getenv("ITER_NUM"); if (iter_per_run) { - args.num_loops = static_cast(strtol(iter_per_run, nullptr, 10)); + args.num_loops = static_cast(strtol(iter_per_run, nullptr, kDefaultLoopCount)); } ret = pipe_executor_->Execute(args); } else { diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc index b043ca7f..6c824bf8 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc @@ -8,6 +8,7 @@ namespace ge { namespace hybrid { namespace { constexpr int kNumExecutors = 2; +const int kMinLoopCount = 2; const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; } @@ -208,7 +209,7 @@ Status HybridModelPipelineExecutor::InitStageExecutors() { Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { int loop_count = args.num_loops; - GE_CHECK_GE(loop_count, 2); + GE_CHECK_GE(loop_count, kMinLoopCount); auto &inputs = args.inputs; auto &input_desc = args.input_desc; From ad66e1fead21d05cfba4e88dd1b65c6ff592826a Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 24 Feb 2021 22:51:52 +0800 Subject: [PATCH 18/44] Fix bug of ge_executor. --- ge/hybrid/executor/node_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index ef348a69..84a52abd 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -30,7 +30,7 @@ class NodeTask; struct GraphExecutionContext; class SubgraphContext; class TaskContext; -class NodeState; +struct NodeState; class ShapeFuture { public: From 1ce576831c72350196866563a07ca9a0f65bd686 Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 25 Feb 2021 21:05:25 +0800 Subject: [PATCH 19/44] fix slice constant folding bug --- ge/host_kernels/slice_kernel.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index c3274465..6b91db1d 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -56,6 +56,8 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); + uint32_t type_size = 0; + (void)TypeUtils::GetDataTypeLength(data_type, type_size); // check data type of begin and size if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { GELOGW("Data type of begin and size for slice are not DT_INT32."); @@ -69,7 +71,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetData().size() / sizeof(int32_t); + size_t data_size = x_->GetData().size() / type_size; size_t begin_size = begin->GetData().size() / sizeof(int32_t); size_t size_size = size->GetData().size() / sizeof(int32_t); const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape(); From 0b5ef05af58a953aa1d0e0a386b35962d852b615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E7=A3=8A?= Date: Wed, 24 Feb 2021 16:43:27 +0800 Subject: [PATCH 20/44] fixed sc warning --- ge/common/helper/om_file_helper.cc | 5 ++- ge/executor/ge_executor.cc | 3 +- ge/graph/build/memory/graph_mem_assigner.cc | 4 +- ge/graph/build/task_generator.cc | 2 +- ge/graph/load/model_manager/davinci_model.cc | 41 ++++++++++--------- ge/graph/manager/graph_manager.cc | 5 +-- ge/graph/manager/graph_mem_allocator.h | 2 +- ge/graph/passes/assign_remove_pass.cc | 6 +-- ge/graph/passes/constant_folding_pass.cc | 6 +-- .../passes/hccl_continuous_memcpy_pass.cc | 22 ++++++---- ge/graph/passes/inplace_support_check_pass.cc | 6 +-- ge/graph/passes/reshape_remove_pass.cc | 2 +- .../passes/subgraph_const_migration_pass.cc | 2 +- ge/graph/preprocess/graph_preprocess.cc | 9 ++-- ge/graph/preprocess/graph_preprocess.h | 4 +- ge/graph/preprocess/multi_batch_options.h | 2 +- ge/hybrid/executor/hybrid_execution_context.h | 4 +- ge/hybrid/executor/subgraph_executor.cc | 4 +- ge/hybrid/node_executor/task_context.cc | 4 +- ge/offline/main.cc | 11 +++-- ge/session/omg.cc | 3 +- ge/single_op/task/op_task.cc | 5 ++- inc/framework/generator/generator_api.h | 3 +- inc/framework/memory/memory_api.h | 3 +- 24 files changed, 83 insertions(+), 75 deletions(-) diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index eb3bbcc2..3702e8f8 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint return ACL_ERROR_GE_PARAM_INVALID; } size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); - GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", + GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu", partition_table->num, sizeof(ModelFileHeader), mem_offset); if (model_data_size <= mem_offset) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", @@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); if (model_data_size <= cur_offset) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", + GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, + "invalid model data, partition_table->num:%u, model data size %u", partition_table->num, model_data_size); return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; } diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 96bb8527..c4088421 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -454,7 +454,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { - GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", + GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, + "Static dims should be same, index: %zu value: %lu should be %ld", i, dynamic_dims[i], all_data_dims[i]); return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; } diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f4df8268..f62f6875 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { } ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, - int64_t dim_index, int64_t &output_mem_size, - int64_t &batch_dim_num, int64_t &out_size) { + int64_t dim_index, int64_t &output_mem_size, + int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); if (graph_status != GRAPH_SUCCESS) { GELOGE(FAILED, "Opdesc GetSize failed!"); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 3f4cd1bc..f15dc21d 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P } GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, - profiling_point.end_index.size() ); + profiling_point.end_index.size()); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index e921aefe..3462baab 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -94,7 +94,7 @@ const int32_t kModelAbortNormal = 0x0704000e; const int32_t kModelAbortNormalNew = 507024; inline bool IsDataOp(const std::string &node_type) { - return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; + return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); } inline bool IsTbeTask(const OpDescPtr &op_desc) { @@ -187,12 +187,12 @@ DavinciModel::~DavinciModel() { UnbindTaskSinkStream(); for (size_t i = 0; i < label_list_.size(); ++i) { if (label_list_[i] != nullptr) { - GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); + GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i); } } for (size_t i = 0; i < stream_list_.size(); ++i) { - GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); + GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i); } for (size_t i = 0; i < event_list_.size(); ++i) { @@ -337,7 +337,7 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (is_feature_map_mem_has_inited_) { - GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once."); + GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once"); return PARAM_INVALID; } is_feature_map_mem_has_inited_ = true; @@ -381,7 +381,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { is_inner_p2p_mem_base_ = true; } - GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); + GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed"); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; @@ -391,7 +391,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { Status DavinciModel::InitVariableMem() { // malloc variable memory base var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); - if (TotalVarMemSize() && var_mem_base_ == nullptr) { + if (TotalVarMemSize() && (var_mem_base_ == nullptr)) { Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); if (ret != SUCCESS) { GELOGE(ret, "Malloc variable memory failed."); @@ -500,25 +500,25 @@ Status DavinciModel::DoTaskSink() { } GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); - GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_); + GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_); GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); if (known_node_) { - GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); + GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed"); } - GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); + GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); - GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); + GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); - GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); + GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); - GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); + GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); @@ -3332,7 +3332,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp /// Status DavinciModel::UpdateIoTaskArgs(const std::map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label) { - string input_or_output = "input"; + string input_or_output; is_input ? input_or_output = "input" : input_or_output = "output"; if (blobs.size() != data_info.size()) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", @@ -3342,7 +3342,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & for (const auto &data : data_info) { if (data.first >= blobs.size()) { // check data index. - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", input_or_output.c_str(), data.first, blobs.size()); return ACL_ERROR_GE_PARAM_INVALID; } @@ -4133,10 +4134,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op int64_t data_input_size; (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", - index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, - TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), - formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); } } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index a57f0e61..8cff22ae 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num))); - GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", - new (std::nothrow) HcclContinuousMemcpyPass)); + GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", + new (std::nothrow) HcclContinuousMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index d3468e75..d5e8cf8d 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -26,6 +26,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" +#include "graph/manager/host_mem_allocator.h" #include "graph/node.h" #include "runtime/mem.h" @@ -139,7 +140,6 @@ class MemoryAllocator { using MemoryAllocatorPtr = std::shared_ptr; class CachingAllocator; class RdmaPoolAllocator; -class HostMemAllocator; class MemManager { public: MemManager(); diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc index e198c2db..4faa04f6 100644 --- a/ge/graph/passes/assign_remove_pass.cc +++ b/ge/graph/passes/assign_remove_pass.cc @@ -24,9 +24,9 @@ namespace { constexpr uint32_t kValidInputNodeOutputNum = 1; constexpr int32_t kAssignRefInputIndex = 0; constexpr int32_t kAssignValueInputIndex = 1; -static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, - ge::CONSTANT, ge::CONSTANTOP, - ge::VARIABLE, ge::VARIABLEV2 }; +const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, + ge::CONSTANT, ge::CONSTANTOP, + ge::VARIABLE, ge::VARIABLEV2 }; } Status AssignRemovePass::Run(NodePtr &node) { diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc index 8a0c6c3c..66e076af 100644 --- a/ge/graph/passes/constant_folding_pass.cc +++ b/ge/graph/passes/constant_folding_pass.cc @@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node, return FoldingPass::RunOpKernel(node, inputs, outputs); } -const std::map> - &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { +const map> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { return statistic_of_ge_constant_folding_; } -const std::map> - &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { +const map> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { return statistic_of_op_constant_folding_; } diff --git a/ge/graph/passes/hccl_continuous_memcpy_pass.cc b/ge/graph/passes/hccl_continuous_memcpy_pass.cc index cc928479..1931baf0 100644 --- a/ge/graph/passes/hccl_continuous_memcpy_pass.cc +++ b/ge/graph/passes/hccl_continuous_memcpy_pass.cc @@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) { /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr /// -NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { +NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &out_data_anchor) { GE_CHECK_NOTNULL_EXEC(graph, return nullptr); NodePtr pre_node = out_data_anchor->GetOwnerNode(); OpDescPtr pre_op_desc = pre_node->GetOpDesc(); @@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node /// @param [in] InDataAnchorPtr hccl_in_anchor /// @return status /// -Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { +Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); @@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra /// @param [in] InDataAnchorPtr hccl_in_anchor /// @return status /// -Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { +Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &src_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), hccl_in_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); @@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr /// @return status /// Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, - const OutDataAnchorPtr &var_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { + const OutDataAnchorPtr &var_out_anchor, + const InDataAnchorPtr &hccl_in_anchor) { if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); return SUCCESS; @@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG /// @param [in] ge::OutDataAnchorPtr variable node out anchor /// @return ge::NodePtr /// -NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { - GE_CHECK_NOTNULL_EXEC(graph , return nullptr); +NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &out_data_anchor) { + GE_CHECK_NOTNULL_EXEC(graph, return nullptr); NodePtr pre_node = out_data_anchor->GetOwnerNode(); OpDescPtr pre_op_desc = pre_node->GetOpDesc(); if (pre_op_desc == nullptr) { diff --git a/ge/graph/passes/inplace_support_check_pass.cc b/ge/graph/passes/inplace_support_check_pass.cc index 44ad8361..44a0b3ef 100644 --- a/ge/graph/passes/inplace_support_check_pass.cc +++ b/ge/graph/passes/inplace_support_check_pass.cc @@ -23,9 +23,9 @@ namespace ge { namespace { constexpr uint32_t kInplaceSupportOutputIndex = 0; constexpr uint32_t kInplaceSupportOutputNum = 1; -static const std::set kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, - ge::CONSTANT, ge::CONSTANTOP, - ge::VARIABLE, ge::VARIABLEV2 }; +const std::set kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, + ge::CONSTANT, ge::CONSTANTOP, + ge::VARIABLE, ge::VARIABLEV2 }; } Status InplaceSupportCheckPass::Run(NodePtr &node) { GELOGD("InplaceSupportCheckPass running"); diff --git a/ge/graph/passes/reshape_remove_pass.cc b/ge/graph/passes/reshape_remove_pass.cc index 80573e79..10937cf1 100755 --- a/ge/graph/passes/reshape_remove_pass.cc +++ b/ge/graph/passes/reshape_remove_pass.cc @@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; - switch(key) { + switch (key) { case kReshapeType: { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index d2effd44..d27cacf7 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra // Break Move and follow, Link Data and follow. const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); - const auto in_anchors =out_anchor->GetPeerInDataAnchors(); + const auto in_anchors = out_anchor->GetPeerInDataAnchors(); for (const auto in_anchor : in_anchors) { GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 499e1897..db17e091 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, Status GetDynamicInputShapeRange(const std::vector &user_input, const std::map &graph_option, vector>> &range_vec) { // check both mode and shape_range option are all enabled - auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); if (!enable_dynamic_execute_mode) { @@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } -Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { +Status GraphPrepare::UpdateInput(const std::vector &user_input, + const std::map &graph_option) { // Get shape range of input in dynamic_execute mode - vector>> dynamic_shape_range_vec; + vector>> dynamic_shape_range_vec; auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); @@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() { return SUCCESS; } -Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input,const std::map &graph_option) { +Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input, + const std::map &graph_option) { compute_graph_->SetInputSize(user_input.size()); if (user_input.empty()) { return SUCCESS; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index de755418..b81067dd 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -63,8 +63,8 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); - Status UpdateInput(const std::vector &user_input, const std::map &graph_option); - Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); + Status UpdateInput(const std::vector &user_input, const std::map &graph_option); + Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); Status VerifyConstOp(const NodePtr &node); Status CheckUserInput(const std::vector &user_input); diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h index 0ddaea0d..ace92ce6 100644 --- a/ge/graph/preprocess/multi_batch_options.h +++ b/ge/graph/preprocess/multi_batch_options.h @@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector &shape, con /// @return 0: true/false /// GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, - const std::string &input_format); + const std::string &input_format); } // namespace multibatch } // namespace ge diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 5e4e4f9a..4dc010df 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -62,9 +62,9 @@ struct GraphExecutionContext { rtStream_t stream = nullptr; rtContext_t rt_context = nullptr; rtContext_t rt_gen_context = nullptr; - std::unique_ptr callback_manager; + std::unique_ptr callback_manager = nullptr; NpuMemoryAllocator *allocator = nullptr; - mutable std::unique_ptr profiler; + mutable std::unique_ptr profiler = nullptr; DumpProperties dump_properties; bool trace_enabled = false; bool dump_enabled = false; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 8fbc6741..45db9936 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) { Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { GetContext().SetSessionId(context_->context_id); HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), - "[%s] Failed to InferShape.", node_state.GetName().c_str()); + "[%s] Failed to InferShape.", node_state.GetName().c_str()); GetContext().SetSessionId(context_->session_id); HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), - "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); + "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 085970e0..f52b48d6 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -520,7 +520,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - const GraphExecutionContext * graph_context = GetExecutionContext(); + const GraphExecutionContext *graph_context = GetExecutionContext(); GE_CHECK_NOTNULL(graph_context); const HybridModel *model = graph_context->model; GE_CHECK_NOTNULL(model); @@ -551,7 +551,7 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - const GraphExecutionContext * graph_context = GetExecutionContext(); + const GraphExecutionContext *graph_context = GetExecutionContext(); GE_CHECK_NOTNULL(graph_context); const HybridModel *model = graph_context->model; GE_CHECK_NOTNULL(model); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 31a9d96f..c7bb46a3 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -62,19 +62,18 @@ using std::shared_ptr; using std::string; using std::vector; +namespace { static bool is_dynamic_input = false; - const char *const kModeSupport = "only support 0(model to framework model), " "1(framework model to json), 3(only pre-check), " "5(pbtxt to json), 6(display model info)"; const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; - -static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; -static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; -static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; - +const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; +const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; +const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; // limit available mem size 2G const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 +} // namespace DEFINE_string(model, "", "The model file."); DEFINE_string(output, "", "The output file path&name."); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index a82db26b..fe0a1a1d 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js } else { ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "ParseModelContent failed because of invalid om file. Please check --om param."); } if (model.model_data != nullptr) { diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 4f1c1f03..df4161c7 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -385,7 +385,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint num_inputs_, num_outputs_, unknown_type_)); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, + "Malloc aicpu_ext_handle mem failed!"); Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); if (ret != SUCCESS) { @@ -423,7 +424,7 @@ Status AiCpuBaseTask::SetInputConst() { return SUCCESS; } -Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, +Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc, rtStream_t stream) { GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h index d44edd0c..56b83a20 100644 --- a/inc/framework/generator/generator_api.h +++ b/inc/framework/generator/generator_api.h @@ -55,7 +55,8 @@ typedef void *OpTensor_t; /// @return 0 for success / others for fail /// GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, - const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); + const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, + const char *om_file); /// /// @ingroup ge diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index 34e596a2..a316fd59 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME /// \param var_info [in] host variable addr infos. /// \param mem_type [in] memory type for rdma pool. /// \return Status result of function -GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); +GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector &var_info, + rtMemType_t mem_type = RT_MEMORY_HBM); /// /// \param tensor_info [in] description for tensor stored shared memory. From db9a09e292c1d3841cb9f6e83033dd92ffad0b2a Mon Sep 17 00:00:00 2001 From: wxl Date: Fri, 26 Feb 2021 10:08:31 +0800 Subject: [PATCH 21/44] fix slice constant folding bug --- ge/host_kernels/slice_kernel.cc | 37 +++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index 6b91db1d..0867ec2f 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -16,6 +16,8 @@ #include "host_kernels/slice_kernel.h" +#include + #include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "common/types.h" @@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3; const size_t kSliceInputIndexX = 0; const size_t kSliceInputIndexBegin = 1; const size_t kSliceInputIndexSize = 2; +const std::set kSupportedDataTypeToLength = { + DT_BOOL, + DT_INT64, + DT_UINT64, + DT_FLOAT, + DT_INT32, + DT_UINT32, + DT_INT8, + DT_UINT8, + DT_INT16, + DT_UINT16, + DT_FLOAT16, + DT_DOUBLE, + DT_DUAL, + DT_DUAL_SUB_INT8, + DT_DUAL_SUB_UINT8, + DT_COMPLEX64, + DT_COMPLEX128, + DT_QINT8, + DT_QINT16, + DT_QINT32, + DT_QUINT8, + DT_QUINT16, +}; } // namespace Status SliceKernel::Compute(const OpDescPtr attr, const std::vector &input, @@ -53,11 +79,18 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); + // check supported + if (kSupportedDataTypeToLength.count(data_type) == 0) { + GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str()); + return NOT_CHANGED; + } uint32_t type_size = 0; - (void)TypeUtils::GetDataTypeLength(data_type, type_size); + bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size); + if (!is_success) { + return NOT_CHANGED; + } // check data type of begin and size if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { GELOGW("Data type of begin and size for slice are not DT_INT32."); From 179b0e21bd521fafa690803600433437b112297a Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Fri, 26 Feb 2021 11:43:17 +0800 Subject: [PATCH 22/44] support error_manager used in train --- CMakeLists.txt | 10 ------- ge/client/ge_api.cc | 30 ++++++++++++++++++++ ge/graph/load/model_manager/davinci_model.cc | 2 ++ ge/graph/load/model_manager/davinci_model.h | 3 ++ ge/graph/manager/graph_manager.cc | 12 +++++--- ge/graph/manager/graph_manager.h | 3 ++ ge/offline/main.cc | 1 + inc/external/ge/ge_api.h | 4 +++ metadef | 2 +- parser | 2 +- 10 files changed, 53 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f88da24e..e67b5074 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${GE_LIB_PATH}) find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH}) find_module(resource libresource.so ${GE_LIB_PATH}) - find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) - find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) elseif(ENABLE_GE_COV OR ENABLE_GE_UT) @@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC) else() find_module(slog libalog.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) - find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) - find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") @@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) - find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) - find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") @@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC) elseif(PLATFORM STREQUAL "all") find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) - find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) - find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) else() message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") @@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL) # common libraries find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) - find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) if (ENABLE_D) @@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES) # common libraries find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) - find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index d65d7667..05671408 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -32,6 +32,7 @@ #include "graph/common/ge_call_wrapper.h" #include "register/op_registry.h" #include "common/ge/tbe_plugin_manager.h" +#include "common/util/error_manager/error_manager.h" #include "toolchain/plog.h" using domi::OpRegistry; @@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map &options) { // Initialize GE, prepare for execution, call GELib::Initialize Status GEInitializeImpl(const std::map &options) { GELOGT(TRACE_INIT, "GEInitialize start"); + + ErrorManager::GetInstance().GenWorkStreamIdDefault(); // 0.check init status if (g_ge_initialized) { GELOGW("GEInitialize is called more than once"); @@ -157,6 +160,8 @@ Status GEInitialize(const std::map &options) { // GE finalize, releasing all resources Status GEFinalize() { GELOGT(TRACE_INIT, "GEFinalize start"); + + ErrorManager::GetInstance().GenWorkStreamIdDefault(); // check init status if (!g_ge_initialized) { GELOGW("GEFinalize is called before GEInitialize"); @@ -202,9 +207,19 @@ Status GEFinalize() { return ret; } +std::string GEGetErrorMsg() { + return ErrorManager::GetInstance().GetErrorMessage(); +} + +std::string GEGetWarningMsg() { + return ErrorManager::GetInstance.GetWarningMessage(); +} + // Initialize session,which calls innerSession Session::Session(const std::map &options) { GELOGT(TRACE_INIT, "Session Constructor start"); + + ErrorManager::GetInstance().GenWorkStreamIdDefault(); // check init status sessionId_ = 0; if (!g_ge_initialized) { @@ -235,6 +250,8 @@ Session::Session(const std::map &options) { Session::Session(const std::map &options) { GELOGT(TRACE_INIT, "Session Constructor start"); + + ErrorManager::GetInstance().GenWorkStreamIdDefault(); // check init status sessionId_ = 0; if (!g_ge_initialized) { @@ -311,11 +328,13 @@ Session::~Session() { Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { std::map options; + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); return AddGraph(graph_id, graph, options); } Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); @@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); @@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, } Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::map options; return AddGraphWithCopy(graph_id, graph, options); } @@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, const std::map &options) { GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); @@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, Status Session::RemoveGraph(uint32_t graph_id) { GELOGT(TRACE_INIT, "Session RemoveGraph start"); + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); // call RemoveGraph std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (!instance_ptr || !instance_ptr->InitFlag()) { @@ -457,6 +480,7 @@ void PrintOutputResult(std::vector &outputs) { Status Session::RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs) { GELOGT(TRACE_INIT, "Session RunGraph start"); + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::vector graph_inputs = inputs; // call RunGraph std::shared_ptr instance_ptr = ge::GELib::GetInstance(); @@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector &inputs, s } Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) { + ErrorManager::GetInstance().GenWorkStreamIdDefault(); return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); } Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) { + ErrorManager::GetInstance().GenWorkStreamIdDefault(); std::string str_key; if (key != nullptr) { str_key = key; @@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu } Status Session::BuildGraph(uint32_t graph_id, const std::vector &inputs) { + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); @@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector Status Session::RunGraphAsync(uint32_t graph_id, const std::vector &inputs, RunAsyncCallback callback) { + ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); @@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector &var_names, std::vector &var_values) { + ErrorManager::GetInstance().GenWorkStreamIdDefault(); auto instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); @@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector &var_names, std::vec } Status Session::GetVariables(const std::vector &var_names, std::vector &var_values) { + ErrorManager::GetInstance().GenWorkStreamIdDefault(); auto instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 3462baab..740a86f5 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -2641,6 +2641,7 @@ void *DavinciModel::Run(DavinciModel *model) { bool seq_end_flag = false; uint32_t model_id = model->Id(); uint32_t device_id = model->GetDeviceId(); + GetContext().SetWorkStreamId(model->GetWorkStreamId()); GELOGI("Model Run thread start, model_id:%u.", model_id); rtError_t rt_ret = rtSetDevice(static_cast(device_id)); @@ -2807,6 +2808,7 @@ Status DavinciModel::ModelRunStart() { int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal); maxDumpOpNum_ = maxDumpOpNum; + work_stream_id_ = GetContext().WorkStreamId(); CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); GELOGI("model tread create success, model id:%u.", model_id_); return SUCCESS; diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 5bc3a68e..a0df910b 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -412,6 +412,8 @@ class DavinciModel { /// uint64_t GetSessionId() const { return session_id_; } + uint64_t GetWorkStreamId() const { return work_stream_id_; } + /// /// @ingroup ge /// @brief SetDeviceId @@ -960,6 +962,7 @@ class DavinciModel { vector output_mbuf_list_; // output mbuf created by dequeue task. uint64_t session_id_; + uint64_t work_stream_id_; uint32_t device_id_; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 8cff22ae..9412ae69 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, compute_graph->GetGraphID(), subgraph, - compute_graph->GetName(), session_id, + compute_graph->GetName(), session_id, GetContext().WorkStreamId(), GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, compute_graph->GetGraphID(), subgraph, - compute_graph->GetName(), session_id, + compute_graph->GetName(), session_id, GetContext().WorkStreamId(), GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -2508,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager const SubGraphInfoPtr &sub_graph_info_ptr, const std::string &root_graph_name, uint64_t session_id, + uint64_t work_stream_id, const GEThreadLocalContext &ge_context) { if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { + GetContext().SetWorkStreamId(work_stream_id); GetContext().SetSessionId(session_id); GetThreadLocalContext() = ge_context; graph_manager->UpdateLocalOmgContext(root_graph_id); @@ -2643,6 +2645,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { GELOGI("A new loop start."); + GetContext().SetWorkStreamId(args.work_stream_id); GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; graph_manager->UpdateLocalOmgContext(args.graph_id); @@ -2724,8 +2727,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { ge_root_model = graph_node->GetGeRootModel(); } - graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor, - ge_root_model, GetThreadLocalContext(), args.callback })); + graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id, + args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); GELOGI("Loop end."); } } @@ -2824,6 +2827,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { GELOGI("A new loop start."); + GetContext().SetWorkStreamId(args.work_stream_id); GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; graph_manager->UpdateLocalOmgContext(args.graph_id); diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 31e8799f..a13aa2ff 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -196,6 +196,7 @@ class GraphManager { GraphId graph_id; std::vector input_tensor; uint64_t session_id; + uint64_t work_stream_id; GEThreadLocalContext context; RunAsyncCallback callback; }; @@ -204,6 +205,7 @@ class GraphManager { GraphNodePtr graph_node; GraphId graph_id; uint64_t session_id; + uint64_t work_stream_id; std::vector input_tensor; GeRootModelPtr ge_root_model; GEThreadLocalContext context; @@ -221,6 +223,7 @@ class GraphManager { const SubGraphInfoPtr &sub_graph_info_ptr, const std::string &root_graph_name, uint64_t session_id, + uint64_t work_stream_id; const GEThreadLocalContext &ge_context); Status ParseInputsDims(const std::vector &input_tensor); void ParseInputsDimsForData(const std::vector &input_tensor); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index c7bb46a3..069ec769 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -1325,6 +1325,7 @@ int init(int argc, char* argv[]) { return ret; } + ErrorManager::GetInstance().GenWorkStreamIdDefault(); return 0; } diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index cd4ca323..c8b5a8ec 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map &)) diff --git a/metadef b/metadef index b6de68fd..f982caa0 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit b6de68fdf0f131fd5f8aa3a84245ad7779b348f5 +Subproject commit f982caa0981b1fdcc55a8ec27b4f4de9c58d33ba diff --git a/parser b/parser index 7a631135..d2fc9584 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 7a6311351f8294eb11033b10e9f7b2b993cc3c2a +Subproject commit d2fc958450f7bd243eff8432aadeb9fa95fa2f61 From 696c7f4b8fe6528cb39280bfa761ede68dc2a47d Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Fri, 26 Feb 2021 14:09:42 +0800 Subject: [PATCH 23/44] profiling file to json --- ge/common/profiling/profiling_manager.cc | 233 ++++++++------- ge/common/profiling/profiling_manager.h | 11 +- ge/graph/build/task_generator.cc | 2 +- ge/graph/load/model_manager/davinci_model.cc | 274 ++++++------------ ge/graph/load/model_manager/davinci_model.h | 3 - ge/hybrid/executor/worker/execution_engine.cc | 60 +--- .../aicore/aicore_node_executor.cc | 5 +- .../aicpu/aicpu_node_executor.cc | 5 +- ge/hybrid/node_executor/task_context.cc | 32 +- ge/hybrid/node_executor/task_context.h | 8 +- ge/single_op/single_op.cc | 30 +- ge/single_op/task/op_task.cc | 33 ++- ge/single_op/task/op_task.h | 8 +- inc/framework/common/ge_types.h | 18 +- tests/ut/ge/CMakeLists.txt | 2 +- .../ge/graph/load/davinci_model_unittest.cc | 7 + .../ge/single_op/single_op_model_unittest.cc | 32 +- 17 files changed, 314 insertions(+), 449 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 86b1b2c5..0cf74b1f 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -20,6 +20,8 @@ #include "framework/common/debug/log.h" #include "framework/common/string_util.h" #include "graph/ge_context.h" +#include "graph/utils/type_utils.h" +#include "graph/types.h" #include "runtime/base.h" #include "graph/load/model_manager/davinci_model.h" @@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; #ifdef DAVINCI_SUPPORT_PROFILING const size_t kReportMaxLen = 2048; const int32_t kMaxDeviceNum = 256; +const uint32_t kInteval = 2; const std::string kConfigNumsdev = "devNums"; const std::string kConfigDevIdList = "devIdList"; const std::string kProfStart = "prof_start"; const std::string kProfStop = "prof_stop"; const std::string kProfModelSubscribe = "prof_model_subscribe"; const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; +const std::string kModelName = "model_name"; +const std::string kModelId = "model_id"; +const std::string kOpNmae = "op_name"; +const std::string kOptype = "op_type"; +const std::string kBlockDim = "block_dims"; +const std::string kTaskId = "task_id"; +const std::string kStreamId = "stream_id"; +const std::string kShapeType = "shape_type"; +const std::string kCurIterNum = "cur_iter_num"; +const std::string kTaskType = "task_type"; +const std::string kInput = "input"; +const std::string kOutput = "output"; +const std::string kFormat = "format"; +const std::string kDataType = "data_type"; +const std::string kShape = "shape"; +const std::string kIdx = "idx"; + #endif } // namespace @@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( + const TaskDescInfo &task, Json &task_json) { #ifdef DAVINCI_SUPPORT_PROFILING - std::string data; - for (const auto &task : task_desc_info) { - std::string model_name = task.model_name; - std::string op_name = task.op_name; - uint32_t block_dim = task.block_dim; - uint32_t task_id = task.task_id; - uint32_t stream_id = task.stream_id; - std::string shape_type = task.shape_type; - int64_t cur_iter_num = task.cur_iter_num; - uint32_t task_type = task.task_type; - data = model_name.append(" ") - .append(op_name).append(" ") - .append(std::to_string(block_dim)).append(" ") - .append(std::to_string(task_id)).append(" ") - .append(std::to_string(stream_id)).append(" ") - .append(std::to_string(model_id)).append(" ") - .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append(" ") - .append(std::to_string(task_type)).append("\n"); - - ReporterData reporter_data{}; - reporter_data.deviceId = device_id; - reporter_data.data = (unsigned char *)data.c_str(); - reporter_data.dataLen = data.size(); - int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); - if (ret != EOK) { - GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); - return; - } - - int32_t cb_ret = CallMsprofReport(reporter_data); - if (cb_ret != 0) { - GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); - return; - } + for (size_t i = 0; i < task.input_format.size(); i++) { + Json tmp_input; + tmp_input[kIdx] = i; + Format format = task.input_format[i]; + tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); + DataType data_type = task.input_data_type[i]; + tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); + tmp_input[kShape] = task.input_shape[i]; + task_json[kInput] += tmp_input; + } + + for (size_t i = 0; i < task.output_format.size(); i++) { + Json tmp_output; + tmp_output[kIdx] = i; + Format format = task.output_format[i]; + tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); + DataType data_type = task.output_data_type[i]; + tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); + tmp_output[kShape] = task.output_shape[i]; + task_json[kOutput] += tmp_output; } - - data.clear(); #endif } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( + uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - std::string data; - for (const auto &graph : compute_graph_desc_info) { - data.append("model_name:") - .append(graph.model_name) - .append(" op_name:") - .append(graph.op_name) - .append(" op_type:") - .append(graph.op_type); - for (size_t i = 0; i < graph.input_format.size(); ++i) { - data.append(" input_id:") - .append(std::to_string(i)) - .append(" input_format:") - .append(std::to_string(graph.input_format.at(i))) - .append(" input_data_type:") - .append(std::to_string(graph.input_data_type.at(i))) - .append(" input_shape:\""); - size_t input_shape_len = graph.input_shape.at(i).size(); - if (input_shape_len == 0) { - data.append(""); - } else if (input_shape_len == 1) { - data.append(std::to_string(graph.input_shape.at(i).at(0))); - } else { - for (size_t j = 0; j < input_shape_len - 1; ++j) { - data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); - } - data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); - } - - data.append("\""); - } - - for (size_t i = 0; i < graph.output_format.size(); ++i) { - data.append(" output_id:") - .append(std::to_string(i)) - .append(" output_format:") - .append(std::to_string(graph.output_format.at(i))) - .append(" output_data_type:") - .append(std::to_string(graph.output_data_type.at(i))) - .append(" output_shape:\""); - size_t output_shape_len = graph.output_shape.at(i).size(); - if (output_shape_len == 0) { - data.append(""); - } else if (output_shape_len == 1) { - data.append(std::to_string(graph.output_shape.at(i).at(0))); - } else { - for (size_t j = 0; j < output_shape_len - 1; ++j) { - data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); - } - data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); - } - data.append("\""); + for (const auto &task : task_desc_info) { + Json task_info; + task_info[kModelName] = task.model_name; + task_info[kModelId] = model_id; + task_info[kOpNmae] = task.op_name; + task_info[kOptype] = task.op_type; + task_info[kBlockDim] = task.block_dim; + task_info[kTaskType] = task.task_type; + task_info[kTaskId] = task.task_id; + task_info[kStreamId] = task.stream_id; + task_info[kCurIterNum] = task.cur_iter_num; + task_info[kShapeType] = task.shape_type; + ProfilingOpInputOutInfo(task, task_info); + + std::string reported_data; + try { + reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + return ; + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + return; } - - data.append(" model_id:").append(std::to_string(model_id)); - data.append(" task_id:").append(std::to_string(graph.task_id)); - data.append(" stream_id:").append(std::to_string(graph.stream_id)); - data.append("\n"); - - GraphDescReport(device_id, data); - data.clear(); + reported_data.append(",") + .append("\n"); + ReportData(device_id, reported_data, "task_desc_info"); } #endif } -void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( + const int32_t &device_id, const string &data, const string &tag_name) { #ifdef DAVINCI_SUPPORT_PROFILING ReporterData reporter_data{}; int ret = -1; @@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d size_t index = data.size() / kReportMaxLen; if (index >= 1) { reporter_data.deviceId = device_id; - ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); - GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); + GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); for (size_t i = 0; i < index; ++i) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; reporter_data.dataLen = kReportMaxLen; cb_ret = CallMsprofReport(reporter_data); - GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); + return;); } reporter_data.dataLen = data.size() - kReportMaxLen * index; if (reporter_data.dataLen != 0) { reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; cb_ret = CallMsprofReport(reporter_data); - GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); + return;); } } else { reporter_data.deviceId = device_id; reporter_data.data = (unsigned char *)data.c_str(); reporter_data.dataLen = data.size(); - ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); - GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); + ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); + GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); cb_ret = CallMsprofReport(reporter_data); - GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); + GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); + return;); } #endif } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info) { + uint32_t model_id, const std::vector &task_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr GELOGD("current logic_device_id:%d", logic_device_id); GELOGD("start ProfilingTaskDescInfo."); ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); - GELOGD("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); GELOGD("Report profiling data for GE end."); #endif } @@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs static_cast(&reporter_data), sizeof(ReporterData)); } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( + const OpDescPtr &op, TaskDescInfo &task_desc_info) const { + std::vector input_format; + std::vector> input_shape; + std::vector input_data_type; + for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); + if (input_tensor_desc == nullptr) { + continue; + } + input_format.emplace_back(input_tensor_desc->GetFormat()); + input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); + input_data_type.emplace_back(input_tensor_desc->GetDataType()); + } + std::vector output_format; + std::vector> output_shape; + std::vector output_data_type; + for (size_t j = 0; j < op->GetOutputsSize(); ++j) { + GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); + if (output_tensor_desc == nullptr) { + continue; + } + output_format.emplace_back(output_tensor_desc->GetFormat()); + output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); + output_data_type.emplace_back(output_tensor_desc->GetDataType()); + } + + std::vector format_default = { FORMAT_NULL }; + std::vector> shape_default = { {0} }; + std::vector data_type_default = { DT_UNDEFINED }; + task_desc_info.input_format = input_format.empty() ? format_default : input_format; + task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; + task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; + task_desc_info.output_format = output_format.empty() ? format_default : output_format; + task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; + task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( std::string &fp_point, std::string &bp_point) { // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 22fa8f8c..34acee0e 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -54,6 +54,8 @@ namespace { } // namespace namespace ge { +class OpDesc; +using OpDescPtr = std::shared_ptr; struct DeviceSubsInfo { uint64_t module; uint32_t subscribe_count; @@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingModelExecuteOn() const; // is_execute_profiling_ only used by ge option and env bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } - void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info); + void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id); - void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, - const int32_t &device_id); + void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); Status PluginInit() const; void PluginUnInit() const; Status CallMsprofReport(ReporterData &reporter_data) const; @@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } void GetFpBpPoint(std::string &fp_point, std::string &bp_point); + void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; + void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); private: Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); Status ParseOptions(const std::string &options); @@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { Status ProfParseDeviceId(const std::map &config_para, vector &device_list); uint64_t GetProfilingModule(); - void GraphDescReport(const int32_t &device_id, const string &data); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index f15dc21d..4eda4020 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi // subgraph of dynamic graph no need to find index, has been found in parent graph if (IsSubGraphOfDynamicGraph(graph)) { - GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); + GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); return SUCCESS; } diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 3462baab..a593ea67 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a; const uint32_t kEndOfSequenceNew = 507005; const int32_t kModelAbortNormal = 0x0704000e; const int32_t kModelAbortNormalNew = 507024; +const uint32_t kInteval = 2; +const char *const kModelName = "model_name"; +const char *const kModeleId = "model_id"; +const char *const kLoadStartTime = "load_start_time"; +const char *const kLoadEndTime = "load_end_time"; +const char *const kFusionOpInfo = "fusion_op_info"; +const char *const kFusionOpName = "fusion_op_name"; +const char *const kOriginalOpNum = "origin_op_num"; +const char *const kOriginalOpName = "origin_op_name"; +const char *const kStreamId = "stream_id"; +const char *const kFusionOpMemoryInfo = "memory_info"; +const char *const kInputSize = "input_size"; +const char *const kOutputSize = "output_size"; +const char *const kWeightSize = "weight_size"; +const char *const kWorkSpaceSize = "workspace_size"; +const char *const kTotalSize = "total_size"; +const char *const kTaskCount = "task_count"; +const char *const kTaskId = "task_id"; +const char* const kRequestId = "request_id"; +const char* const kThreadId = "thread_id"; +const char* const kInputBeginTime = "input_begin_time"; +const char* const kInputEndTime = "input_end_time"; +const char* const kInferBeginTime = "infer_begin_time"; +const char* const kInferEndTime = "infer_end_time"; +const char* const kOutputBeginTime = "output_start_time"; +const char* const kOutputEndTime = "output_end_time"; inline bool IsDataOp(const std::string &node_type) { return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); @@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size } Status DavinciModel::ReportProfilingData() { - std::vector compute_graph_desc_info; - Status ret = GetComputeGraphInfo(compute_graph_desc_info); - if (ret != SUCCESS) { - GELOGE(ret, "GetComputeGraphInfo failed."); - return ret; - } - ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); return SUCCESS; @@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { } Status DavinciModel::SinkModelProfile() { - // profiling plugin must be registered auto &prof_mgr = ProfilingManager::Instance(); - ReporterData reporter_data{}; - // report model data tag name - std::string tag_name("model_load_info_" + std::to_string(this->Id())); - GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, - return FAILED, "Sink model tag memcpy error."); - // Model Header std::string name = om_name_.empty() ? name_ : om_name_; - size_t name_len = name.size(); - reporter_data.deviceId = device_id_; - reporter_data.data = (unsigned char *)&name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)name.c_str(); - reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - uint32_t model_id = this->Id(); - reporter_data.data = (unsigned char *)&model_id; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // Load Start/End Time int64_t start_time = this->GetLoadBeginTime(); - reporter_data.data = (unsigned char *)&start_time; - reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - int64_t end_time = this->GetLoadEndTime(); - reporter_data.data = (unsigned char *)&end_time; - reporter_data.dataLen = sizeof(int64_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); + Json model_load_info; + model_load_info[kModelName] = name; + model_load_info[kModeleId] = model_id; + model_load_info[kLoadStartTime] = start_time; + model_load_info[kLoadEndTime] = end_time; + // fusion op info using CIT = std::multimap::const_iterator; using Range = std::pair; for (const ProfileInfo &profile : profile_list_) { - // op name after fusion + Json fusion_op_info; string fusion_op_name = profile.fusion_info.op_name; - int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); - reporter_data.data = (unsigned char *)&fusion_op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)fusion_op_name.c_str(); - reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // original op name before fusion uint32_t op_num = profile.fusion_info.original_op_names.size(); - reporter_data.data = (unsigned char *)&op_num; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - + vector original_name; for (uint32_t k = 0; k < op_num; k++) { - std::string op_name = profile.fusion_info.original_op_names[k]; - int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); - reporter_data.data = (unsigned char *)&op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - reporter_data.data = (unsigned char *)op_name.c_str(); - reporter_data.dataLen = op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - } - - // stream id info + original_name.emplace_back(profile.fusion_info.original_op_names[k]); + } uint32_t stream_id = 0; auto iter = profiler_report_op_info_.find(fusion_op_name); if (iter != profiler_report_op_info_.end()) { stream_id = iter->second.second; } - reporter_data.data = (unsigned char *)&stream_id; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // memory info - reporter_data.data = (unsigned char *)&profile.memory_info; - reporter_data.dataLen = sizeof(profile.memory_info); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // task info - reporter_data.data = (unsigned char *)&profile.task_count; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - + fusion_op_info[kFusionOpName] = fusion_op_name; + fusion_op_info[kOriginalOpNum] = op_num; + fusion_op_info[kOriginalOpName] = original_name; + fusion_op_info[kStreamId] = stream_id; + fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; + fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; + fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; + fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; + fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; + fusion_op_info[kTaskCount] = profile.task_count; + vector task_id; Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); for (CIT idx = task_range.first; idx != task_range.second; ++idx) { - uint32_t task_id = idx->second; - reporter_data.data = (unsigned char *)&task_id; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); + task_id.push_back(idx->second); } + fusion_op_info[kTaskId] = task_id; + model_load_info[kFusionOpInfo] += fusion_op_info; } + std::string tag_name("model_load_info_" + std::to_string(this->Id())); + std::string reported_data; + try { + reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id_, reported_data, tag_name); return SUCCESS; } Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { - // profiling plugin must be registered auto &prof_mgr = ProfilingManager::Instance(); - ReporterData reporter_data{}; + + string name = om_name_.empty() ? name_ : om_name_; + Json model_time_info; + model_time_info[kModelName] = name; + model_time_info[kModeleId] = this->Id(); + model_time_info[kRequestId] = current_data.request_id; + model_time_info[kThreadId] = GetDataInputTid(); + model_time_info[kInputBeginTime] = time_info_.processBeginTime; + model_time_info[kInputEndTime] = time_info_.processEndTime; + model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; + model_time_info[kInferEndTime] = time_info_.inferenceEndTime; + model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; + model_time_info[kOutputEndTime] = time_info_.dumpEndTime; + // report model data tag name std::string tag_name; tag_name.append("model_time_info_") - .append(std::to_string(this->Id())) - .append("_") - .append(std::to_string(current_data.index)); - - GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, - return FAILED, "Sink model tag memcpy error."); - // device id - reporter_data.deviceId = device_id_; - - // Model Header - string name; - if (!om_name_.empty()) { - name = om_name_; - } else { - name = name_; - } - size_t name_len = name.size(); - reporter_data.data = (unsigned char *)&name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)name.c_str(); - reporter_data.dataLen = name.size(); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // request id - uint64_t request_id = current_data.request_id; - reporter_data.data = (unsigned char *)&request_id; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); - - // thread id - int32_t thread_id = GetDataInputTid(); - reporter_data.data = (unsigned char *)&thread_id; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); - - // time info - time_info_.modelId = this->Id(); - reporter_data.data = (unsigned char *)&time_info_; - reporter_data.dataLen = sizeof(struct timeInfo); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); + .append(std::to_string(this->Id())) + .append("_") + .append(std::to_string(current_data.index)); + std::string reported_data; + try { + reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id_, reported_data, tag_name); return SUCCESS; } @@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo task_desc_info.model_name = name_; } task_desc_info.op_name = op->GetName(); + task_desc_info.op_type = op->GetType(); task_desc_info.block_dim = task_def.kernel().block_dim(); task_desc_info.task_id = task->GetTaskID(); task_desc_info.stream_id = task->GetStreamId(); task_desc_info.shape_type = "static"; task_desc_info.cur_iter_num = 0; - // task type task_desc_info.task_type = kTaskTypeInvalid; + auto &prof_mgr = ProfilingManager::Instance(); + prof_mgr.GetOpInputOutputInfo(op, task_desc_info); auto model_task_type = static_cast(task_def.type()); if (model_task_type == RT_MODEL_TASK_KERNEL) { const domi::KernelDef &kernel_def = task_def.kernel(); @@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo task_desc_info_.emplace_back(task_desc_info); } } - return; } Status DavinciModel::DistributeTask() { @@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea main_follow_stream_mapping_[main_stream_id].emplace_back(stream); } -Status DavinciModel::GetComputeGraphInfo(vector &graph_desc_info) { - auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); - for (auto &op_desc : all_op_desc) { - ComputeGraphDescInfo compute_graph_info; - if (!om_name_.empty()) { - compute_graph_info.model_name = om_name_; - } else { - compute_graph_info.model_name = name_; - } - - std::vector format = { FORMAT_NULL }; - std::vector> shape = { {0} }; - std::vector data_type = { DT_UNDEFINED }; - compute_graph_info.op_name = op_desc.op_name; - compute_graph_info.op_type = op_desc.op_type; - compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; - compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; - compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; - compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; - compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; - compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; - uint32_t task_id = 0; - uint32_t stream_id = 0; - auto iter = profiler_report_op_info_.find(op_desc.op_name); - if (iter != profiler_report_op_info_.end()) { - task_id = iter->second.first; - stream_id = iter->second.second; - } - compute_graph_info.task_id = task_id; - compute_graph_info.stream_id = stream_id; - graph_desc_info.emplace_back(compute_graph_info); - } - return SUCCESS; -} - void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 5bc3a68e..f0db99e4 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -840,9 +840,6 @@ class DavinciModel { Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); - // get desc info of graph for profiling - Status GetComputeGraphInfo(vector &graph_desc_info); - void SetDataDumperArgs(const ComputeGraphPtr &graph, const map &variable_by_name); Status InitL1DataDumperArgs(); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index fda65cb2..63d9126b 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -70,8 +70,6 @@ class NodeDoneCallback { Status PrepareConstInputs(const NodeItem &node_item); Status DumpDynamicNode(); Status ProfilingReport(); - Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, - std::vector &compute_graph_info); Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, std::vector &task_desc_info); GraphExecutionContext *graph_context_; @@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); + auto &prof_mgr = ProfilingManager::Instance(); task_desc_info = context_->GetProfilingTaskDescInfo(); context_->ClearProfilingTaskDescInfo(); - - return SUCCESS; -} - -Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, - std::vector &compute_graph_info) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(model); - - GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); - compute_graph_info = context_->GetProfilingGraphDescInfo(); - context_->ClearProfilingGraphDescInfo(); - - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - for (auto &tmp_compute_graph_info : compute_graph_info) { - // default - if (op_desc->GetAllInputsSize() == 0) { - tmp_compute_graph_info.input_format = { FORMAT_NULL }; - tmp_compute_graph_info.input_shape = { {0} }; - tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; - } - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - if (op_desc->GetOutputsSize() == 0) { - tmp_compute_graph_info.output_format = { FORMAT_NULL }; - tmp_compute_graph_info.output_shape = { {0} }; - tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; - } - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } + for (auto &tmp_task_desc : task_desc_info) { + // save op input and output info + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); } return SUCCESS; @@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { return profiling_ret; } - std::vector compute_graph_info; - profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); - if (profiling_ret != RT_ERROR_NONE) { - GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); - return profiling_ret; - } - auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 3174df80..5a5355cd 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function uint32_t stream_id = 0; rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return FAILED; + GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); - (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 481507ae..1f77bab8 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionmodel; GE_CHECK_NOTNULL(model); - std::string op_name = op_desc->GetName(); std::string dynamic_model_name = model->GetModelName(); TaskDescInfo tmp_task_desc_info; tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.op_name = op_desc->GetName(); + tmp_task_desc_info.op_type = op_desc->GetType(); tmp_task_desc_info.block_dim = block_dim; tmp_task_desc_info.task_type = task_type; tmp_task_desc_info.task_id = task_id; @@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { return node_state_; } -Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { - const NodeItem &node_item = GetNodeItem(); - auto op_desc = node_item.GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - const GraphExecutionContext *graph_context = GetExecutionContext(); - GE_CHECK_NOTNULL(graph_context); - const HybridModel *model = graph_context->model; - GE_CHECK_NOTNULL(model); - - std::string dynamic_model_name = model->GetModelName(); - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - ComputeGraphDescInfo tmp_compute_graph_info; - tmp_compute_graph_info.model_name = dynamic_model_name; - tmp_compute_graph_info.op_name = op_desc->GetName(); - tmp_compute_graph_info.op_type = op_desc->GetType(); - tmp_compute_graph_info.task_id = task_id; - tmp_compute_graph_info.stream_id = stream_id; - compute_graph_info.emplace_back(tmp_compute_graph_info); - } - } - return SUCCESS; -} - } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index f29918b4..645c1234 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -113,13 +113,10 @@ class TaskContext { void *handle_ = nullptr; const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } - Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); + Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, + const std::string &task_type, uint32_t block_dim); void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } - const std::vector& GetProfilingGraphDescInfo() const { return compute_graph_info; } - Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); - void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } - private: TaskContext(GraphExecutionContext *execution_context, NodeState *node_state, @@ -141,7 +138,6 @@ class TaskContext { uint32_t task_id_ = 0; uint32_t stream_id_ = 0; std::vector task_desc_info; - std::vector compute_graph_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 168ca2c5..e7a97372 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { return SUCCESS; } - string model_name; - string op_name; + TaskDescInfo tmp_task_desc_info; uint32_t model_id; - uint32_t block_dim; - if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { + if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); return ACL_ERROR_GE_PARAM_INVALID; } - GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); - std::vector task_desc_info; - uint32_t task_id = 0; - uint32_t stream_id = 0; - auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } + GELOGD("ProfilingReport of op[%s] model[%s] start.", + tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = block_dim; - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; tmp_task_desc_info.task_type = op_task->GetTaskType(); - GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); - std::vector compute_graph_info; + std::vector task_desc_info; + task_desc_info.emplace_back(tmp_task_desc_info); auto &profiling_manager = ProfilingManager::Instance(); - profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); + profiling_manager.ReportProfilingData(model_id, task_desc_info); return SUCCESS; } } // namespace diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index df4161c7..973d7c05 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -23,6 +23,7 @@ #include "aicpu/common/aicpu_task_struct.h" #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" +#include "common/profiling/profiling_manager.h" #include "common/formats/formats.h" #include "common/math/math_util.h" #include "framework/common/debug/log.h" @@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { model_id_ = model_id; } -Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, - uint32_t &block_dim) { - model_name = model_name_; - model_id = model_id_; - block_dim = block_dim_; +Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { + uint32_t task_id = 0; + uint32_t stream_id = 0; + auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } GE_CHECK_NOTNULL(op_desc_); - op_name = op_desc_->GetName(); + string op_name = op_desc_->GetName(); + GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); + model_id = model_id_; + task_desc_info.model_name = model_name_; + task_desc_info.block_dim = block_dim_; + task_desc_info.task_id = task_id; + task_desc_info.stream_id = stream_id; + task_desc_info.op_name = op_name; + task_desc_info.op_type = op_desc_->GetType(); + auto &prof_mgr = ProfilingManager::Instance(); + prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); return SUCCESS; } + Status OpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { return UNSUPPORTED; } @@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } -uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } +const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { @@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } -uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } +const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } void TbeOpTask::SetHandle(void *handle) { this->handle_ = handle; @@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } -uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } +const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index be7f4aab..8c91bd5f 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -43,7 +43,7 @@ class OpTask { const vector &output_desc); virtual Status UpdateArgTable(const SingleOpModelParam ¶m); void SetModelArgs(std::string model_name, uint32_t model_id); - Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); + Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(rtStream_t stream); virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; @@ -52,7 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); - virtual uint32_t GetTaskType() const; + virtual const std::string &GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -88,7 +88,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); - uint32_t GetTaskType() const override; + const std::string &GetTaskType() const override; void SetHandle(void *handle); private: @@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; - uint32_t GetTaskType() const override; + const std::string &GetTaskType() const override; protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index ec5adcba..0d996a67 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; // profiling data -const uint32_t kTaskTypeAicore = 0; -const uint32_t kTaskTypeAicpu = 1; -const uint32_t kTaskTypeInvalid = 0xFFFF; +const std::string kTaskTypeAicore = "AI_CORE"; +const std::string kTaskTypeAicpu = "AI_CPU"; +const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; // Data cache, including data address and length struct DataBuffer { @@ -251,27 +251,19 @@ struct Options { struct TaskDescInfo { std::string model_name; std::string op_name; + std::string op_type; uint32_t block_dim; uint32_t task_id; uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; - uint32_t task_type; -}; - -// Profiling info of graph -struct ComputeGraphDescInfo { - std::string model_name; - std::string op_name; - std::string op_type; + std::string task_type; std::vector input_format; std::vector> input_shape; std::vector input_data_type; std::vector output_format; std::vector> output_shape; std::vector output_data_type; - uint32_t task_id; - uint32_t stream_id; }; struct OpDescInfo { diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 688e393c..b8eb3e22 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES ) set(SINGLE_OP_TEST_FILES - #"single_op/single_op_model_unittest.cc" + "single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" "single_op/stream_resource_unittest.cc" "single_op/single_op_task_unittest.cc" diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index b8a963e3..fe39adf6 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { model.SinkModelProfile(); } +TEST_F(UtestDavinciModel, Sink_time_profile) { + ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + DavinciModel model(0, nullptr); + InputData current_data; + model.SinkTimeProfile(current_data); +} + } // namespace ge diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index ab909e11..eaf4564a 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { void TearDown() {} }; +//rt api stub +rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { + return RT_ERROR_NONE; +} /* TEST_F(UtestSingleOpModel, test_init_model) { string model_data_str = "123456789"; @@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { std::mutex stream_mu_; rtStream_t stream_ = nullptr; - SingleOp single_op(&stream_mu_, stream_); - - ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); +// SingleOp single_op(&stream_mu_, stream_); +// +// ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); } /* TEST_F(UtestSingleOpModel, test_build_kernel_task) { @@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { ASSERT_EQ(op_model.Init(), FAILED); } */ - +/* TEST_F(UtestSingleOpModel, test_parse_arg_table) { string model_data_str = "123456789"; SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); @@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { ASSERT_EQ(op.arg_table_[1].size(), 1); ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); } +*/ +TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { + string name = "relu"; + string type = "relu"; + auto op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + op_desc->SetId(0); + TbeOpTask task; + task.op_desc_ = op_desc; + task.model_name_ = "resnet_50"; + task.model_id_ = 1; + TaskDescInfo task_desc_info; + uint32_t model_id; + task.GetProfilingArgs(task_desc_info, model_id); + + ASSERT_EQ(task_desc_info.model_name, "resnet_50"); + ASSERT_EQ(model_id, 1); +} + + From ce22b66201cc65127ce183f0ab84d707ca2317e2 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Fri, 26 Feb 2021 14:16:51 +0800 Subject: [PATCH 24/44] fix compile error --- ge/graph/manager/graph_manager.cc | 3 ++- ge/graph/manager/graph_manager.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 9412ae69..92951a89 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2558,7 +2558,8 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vector &input_tensor); void ParseInputsDimsForData(const std::vector &input_tensor); From f923b94ecf23a38de5f4e16b60687b58f198ac00 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Fri, 26 Feb 2021 15:17:22 +0800 Subject: [PATCH 25/44] fix compile error --- ge/client/ge_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 05671408..5d149920 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -212,7 +212,7 @@ std::string GEGetErrorMsg() { } std::string GEGetWarningMsg() { - return ErrorManager::GetInstance.GetWarningMessage(); + return ErrorManager::GetInstance().GetWarningMessage(); } // Initialize session,which calls innerSession From 70beda36e4bdaacd6348a90df56326a33d077485 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 26 Feb 2021 17:28:44 +0800 Subject: [PATCH 26/44] single_op doesn't execute atomic. --- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f3699b6c..f7f36a59 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -131,7 +131,7 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { } void *bin_handle = nullptr; - GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); + GELOGD("Start to register kernel for node[%s].", op_desc.GetName().c_str()); rtDevBinary_t binary; std::string json_string; GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), @@ -149,7 +149,7 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { binary.version = 0; binary.data = tbe_kernel->GetBinData(); binary.length = tbe_kernel->GetBinDataSize(); - GELOGI("TBE: binary.length: %lu", binary.length); + GELOGI("TBE: binary.length: %lu.", binary.length); GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); handle_ = bin_handle; auto holder = std::unique_ptr(new (std::nothrow) TbeHandleHolder(handle_)); @@ -351,6 +351,9 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", node->GetName().c_str()); + if (is_single_op_) { + tiling_info.clear_atomic = false; + } GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); return SUCCESS; } From cb44858dbf5fd892952ced1f83ea55cf526560d5 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 26 Feb 2021 20:36:38 +0800 Subject: [PATCH 27/44] Add single_op model_id. --- inc/framework/executor/ge_executor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index c546f63d..59a1f8ab 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -260,6 +260,7 @@ class GE_FUNC_VISIBILITY GeExecutor { static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, SingleOp **single_op); + static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, std::vector &outputs); From 8ae2d4267596beeb26641038b0af87f3329580e7 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 27 Feb 2021 11:18:06 +0800 Subject: [PATCH 28/44] modify build.sh add error_manager.so --- build.sh | 8 ++++---- metadef | 2 +- parser | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index a7e87cd5..3e2dcdec 100644 --- a/build.sh +++ b/build.sh @@ -185,7 +185,7 @@ build_graphengine() # build all the target TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" fi - + make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install if [ $? -ne 0 ] then @@ -250,8 +250,8 @@ generate_package() NNENGINE_PATH="plugin/nnengine/ge_config" OPSKERNEL_PATH="plugin/opskernel" - ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so") - FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so") + ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") + FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt") PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") @@ -270,7 +270,7 @@ generate_package() mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" - + cd "${OUTPUT_PATH}" find ./ -name graphengine_lib.tar -exec rm {} \; diff --git a/metadef b/metadef index f982caa0..4a9bfd77 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit f982caa0981b1fdcc55a8ec27b4f4de9c58d33ba +Subproject commit 4a9bfd772cad72ff281a2e21d59b8d225a26789c diff --git a/parser b/parser index d2fc9584..86162f60 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit d2fc958450f7bd243eff8432aadeb9fa95fa2f61 +Subproject commit 86162f60807c063f7344f902e443fc99657be637 From 8da56f278353b75ae8be13bdd77b5359f5aaedc3 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 11:39:26 +0800 Subject: [PATCH 29/44] Add ut. --- .../node_executor/aicore/aicore_op_task.cc | 4 +- tests/ut/ge/CMakeLists.txt | 5 +++ tests/ut/ge/exeutor/ge_exeutor_unittest.cc | 43 +++++++++++++++++++ tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 12 ++++++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 tests/ut/ge/exeutor/ge_exeutor_unittest.cc diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f7f36a59..07c2ddb5 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -131,7 +131,7 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { } void *bin_handle = nullptr; - GELOGD("Start to register kernel for node[%s].", op_desc.GetName().c_str()); + GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); rtDevBinary_t binary; std::string json_string; GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), @@ -149,7 +149,7 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { binary.version = 0; binary.data = tbe_kernel->GetBinData(); binary.length = tbe_kernel->GetBinDataSize(); - GELOGI("TBE: binary.length: %lu.", binary.length); + GELOGI("TBE: binary.length: %lu", binary.length); GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); handle_ = bin_handle; auto holder = std::unique_ptr(new (std::nothrow) TbeHandleHolder(handle_)); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index b8eb3e22..d0aa8b21 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -760,6 +760,10 @@ set(GENERATOR_TEST_FILES "generator/ge_generator_unittest.cc" ) +set(EXECUTOR_TEST_FILES + "exeutor/ge_exeutor_unittest.cc" +) + set(SINGLE_OP_TEST_FILES "single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" @@ -1066,6 +1070,7 @@ target_link_libraries(ut_libge_kernel_utest add_executable(ut_libge_distinct_load_utest ${COMMON_TEST_FILES} ${GENERATOR_TEST_FILES} + ${EXECUTOR_TEST_FILES} ${DISTINCT_GRAPH_LOAD_TEST_FILES} ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} diff --git a/tests/ut/ge/exeutor/ge_exeutor_unittest.cc b/tests/ut/ge/exeutor/ge_exeutor_unittest.cc new file mode 100644 index 00000000..c80b59a5 --- /dev/null +++ b/tests/ut/ge/exeutor/ge_exeutor_unittest.cc @@ -0,0 +1,43 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "generator/ge_executor.h" +#include "graph/utils/tensor_utils.h" + +using namespace std; + +namespace ge { +class UtestGeExecutor : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestGeExecutor, test_single_op_exec) { + GeExecutor exeutor; + ModelData model_data; + string model_name = "1234"; + void *stream = nullptr; + SingleOp *single_op = nullptr; + + exeutor.LoadSingleOp(model_name, model_data, stream, single_op); +} +} // namespace ge diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 61f99950..c7e0e2fa 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -98,4 +98,16 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { aicore_task->handle_ = handle; aicore_task->tiling_key_ = 1; ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); +} + +TEST_F(UtestGeHybrid, task_update_tiling_info) { + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + aicore_task->is_single_op_ = true; + auto graph = make_shared("graph"); + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key"); + ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); + auto node = graph->AddNode(op_desc); + optiling::OpRunInfo tiling_info; + ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS) } \ No newline at end of file From fb9a4373c76eb97abe9f5a9b70723f80656ccde7 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 11:43:19 +0800 Subject: [PATCH 30/44] Fix ut. --- tests/ut/ge/CMakeLists.txt | 5 --- tests/ut/ge/exeutor/ge_exeutor_unittest.cc | 43 ---------------------- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 2 +- 3 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 tests/ut/ge/exeutor/ge_exeutor_unittest.cc diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index d0aa8b21..b8eb3e22 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -760,10 +760,6 @@ set(GENERATOR_TEST_FILES "generator/ge_generator_unittest.cc" ) -set(EXECUTOR_TEST_FILES - "exeutor/ge_exeutor_unittest.cc" -) - set(SINGLE_OP_TEST_FILES "single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" @@ -1070,7 +1066,6 @@ target_link_libraries(ut_libge_kernel_utest add_executable(ut_libge_distinct_load_utest ${COMMON_TEST_FILES} ${GENERATOR_TEST_FILES} - ${EXECUTOR_TEST_FILES} ${DISTINCT_GRAPH_LOAD_TEST_FILES} ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} diff --git a/tests/ut/ge/exeutor/ge_exeutor_unittest.cc b/tests/ut/ge/exeutor/ge_exeutor_unittest.cc deleted file mode 100644 index c80b59a5..00000000 --- a/tests/ut/ge/exeutor/ge_exeutor_unittest.cc +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#define private public -#define protected public -#include "generator/ge_executor.h" -#include "graph/utils/tensor_utils.h" - -using namespace std; - -namespace ge { -class UtestGeExecutor : public testing::Test { - protected: - void SetUp() {} - - void TearDown() {} -}; - -TEST_F(UtestGeExecutor, test_single_op_exec) { - GeExecutor exeutor; - ModelData model_data; - string model_name = "1234"; - void *stream = nullptr; - SingleOp *single_op = nullptr; - - exeutor.LoadSingleOp(model_name, model_data, stream, single_op); -} -} // namespace ge diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index c7e0e2fa..97a36894 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -109,5 +109,5 @@ TEST_F(UtestGeHybrid, task_update_tiling_info) { ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); auto node = graph->AddNode(op_desc); optiling::OpRunInfo tiling_info; - ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS) + ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS); } \ No newline at end of file From 74563727dae1563c97896a7b4398689cd8803783 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 27 Feb 2021 14:07:03 +0800 Subject: [PATCH 31/44] delete davinci_model_parser.cc --- ge/CMakeLists.txt | 2 - ge/common/CMakeLists.txt | 2 +- ge/common/helper/model_cache_helper.cc | 15 ++---- ge/common/helper/model_helper.cc | 12 ++--- .../model_parser/{base.cc => model_parser.cc} | 11 ++--- .../model_parser/{base.h => model_parser.h} | 0 ge/executor/CMakeLists.txt | 1 - ge/executor/ge_executor.cc | 9 ---- ge/graph/execute/graph_execute.cc | 4 -- ge/graph/load/graph_loader.cc | 14 ++---- ge/graph/load/graph_loader.h | 4 +- .../model_manager/davinci_model_parser.cc | 23 ---------- .../load/model_manager/davinci_model_parser.h | 46 ------------------- ge/graph/load/model_manager/model_manager.cc | 12 +---- ge/graph/preprocess/graph_preprocess.h | 2 +- ge/session/omg.cc | 2 +- ge/single_op/single_op_model.h | 1 - tests/depends/omg/src/omg_stub.cc | 2 +- tests/ut/ge/CMakeLists.txt | 7 ++- tests/ut/ge/graph/ge_executor_unittest.cc | 1 - tests/ut/ge/graph/graph_load_unittest.cc | 1 - .../ge/graph/load/model_manager_unittest.cc | 1 - ...el_manager_model_manager_aicpu_unittest.cc | 3 +- 23 files changed, 26 insertions(+), 149 deletions(-) rename ge/common/model_parser/{base.cc => model_parser.cc} (96%) rename ge/common/model_parser/{base.h => model_parser.h} (100%) delete mode 100644 ge/graph/load/model_manager/davinci_model_parser.cc delete mode 100755 ge/graph/load/model_manager/davinci_model_parser.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index db316ffa..93c88cbf 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -133,7 +133,6 @@ set(TRAIN_SRC_LIST "graph/load/model_manager/data_dumper.cc" "graph/load/model_manager/data_inputer.cc" "graph/load/model_manager/davinci_model.cc" - "graph/load/model_manager/davinci_model_parser.cc" "graph/load/model_manager/model_manager.cc" "graph/load/model_manager/model_utils.cc" "graph/load/model_manager/aipp_utils.cc" @@ -613,7 +612,6 @@ set(INFER_SRC_LIST "graph/load/model_manager/model_manager.cc" "graph/load/model_manager/data_inputer.cc" "graph/load/model_manager/davinci_model.cc" - "graph/load/model_manager/davinci_model_parser.cc" "graph/load/model_manager/model_utils.cc" "graph/load/model_manager/aipp_utils.cc" "graph/load/model_manager/tbe_handle_store.cc" diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index c73e21c6..a6f8e57c 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -54,7 +54,7 @@ set(SRC_LIST "util.cc" "properties_manager.cc" "types.cc" - "model_parser/base.cc" + "model_parser/model_parser.cc" "kernel_store.cc" "tbe_kernel_store.cc" "cust_aicpu_kernel_store.cc" diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index 67d934df..41ad6d59 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -14,22 +14,15 @@ * limitations under the License. */ -#include +#include "common/helper/model_cache_helper.h" + #include #include #include -#include "common/ge/ge_util.h" -#include "common/helper/model_cache_helper.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/ge_types.h" +#include "common/model_parser/model_parser.h" #include "framework/common/helper/model_helper.h" -#include "framework/common/util.h" -#include "graph/detail/attributes_holder.h" #include "graph/detail/model_serialize_imp.h" -#include "graph/load/model_manager/davinci_model_parser.h" -#include "graph/model.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" #include "init/gelib.h" @@ -1682,7 +1675,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const { string key_path; int32_t priority = 0; ModelData model_data; - ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); + ret = ModelParserBase::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); if (ret != SUCCESS) { GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); return ret; diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 7de7d8e0..561fcc40 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -16,15 +16,9 @@ #include "framework/common/helper/model_helper.h" -#include "common/ge/ge_util.h" -#include "common/util/error_manager/error_manager.h" -#include "framework/common/debug/log.h" -#include "framework/common/util.h" -#include "framework/common/debug/ge_log.h" +#include "common/model_parser/model_parser.h" #include "framework/omg/version.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/model_manager/davinci_model_parser.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" using std::string; @@ -464,7 +458,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c return ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA; } - Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); + Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); return ACL_ERROR_GE_PARAM_INVALID; @@ -513,7 +507,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod return INTERNAL_ERROR; } - Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); + Status status = ModelParserBase::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); return ACL_ERROR_GE_PARAM_INVALID; diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/model_parser.cc similarity index 96% rename from ge/common/model_parser/base.cc rename to ge/common/model_parser/model_parser.cc index 22837be6..9c00ab08 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/model_parser.cc @@ -14,16 +14,13 @@ * limitations under the License. */ -#include "common/model_parser/base.h" -#include "common/helper/model_helper.h" -#include +#include "common/model_parser/model_parser.h" + #include -#include #include -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" -#include "framework/common/util.h" +#include "securec.h" +#include "common/helper/model_helper.h" namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelParserBase::ModelParserBase() {} diff --git a/ge/common/model_parser/base.h b/ge/common/model_parser/model_parser.h similarity index 100% rename from ge/common/model_parser/base.h rename to ge/common/model_parser/model_parser.h diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index cf66eabe..31cbad7a 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -33,7 +33,6 @@ set(SRC_LIST "../model/ge_model.cc" "../model/ge_root_model.cc" "../graph/load/model_manager/davinci_model.cc" - "../graph/load/model_manager/davinci_model_parser.cc" "../graph/load/model_manager/model_manager.cc" "../graph/load/model_manager/tbe_handle_store.cc" "../graph/load/model_manager/cpu_queue_schedule.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index c4088421..bdc7ac5d 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -16,7 +16,6 @@ #include "executor/ge_executor.h" #include -#include #include #include #include "common/debug/log.h" @@ -24,19 +23,11 @@ #include "common/helper/model_helper.h" #include "common/profiling/profiling_manager.h" #include "common/dump/dump_manager.h" -#include "common/util.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/util.h" #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" -#include "graph/model.h" -#include "graph/utils/graph_utils.h" -#include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" -#include "graph/manager/graph_var_manager.h" #include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 79c22a29..1aee756c 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -19,12 +19,8 @@ #include #include -#include "common/ge_inner_error_codes.h" -#include "common/model_parser/base.h" #include "graph/load/model_manager/model_manager.h" #include "omm/csa_interact.h" -#include "runtime/dev.h" -#include "runtime/mem.h" namespace ge { GraphExecutor::GraphExecutor() diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 29afc939..644880ce 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -20,19 +20,13 @@ #include #include "common/helper/model_helper.h" -#include "common/util.h" +#include "common/model_parser/model_parser.h" #include "graph/ge_context.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" -#include "runtime/dev.h" namespace ge { -GraphLoader::GraphLoader() = default; - -GraphLoader::~GraphLoader() = default; - Status GraphLoader::UnloadModel(uint32_t model_id) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); @@ -120,7 +114,6 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) { Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority, ModelData &model_data) { - Status ret; if (!CheckInputPathValid(path)) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; @@ -132,16 +125,15 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string return ACL_ERROR_GE_PARAM_INVALID; } - ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); + Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); if (ret != SUCCESS) { GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); if (model_data.model_data != nullptr) { delete[] static_cast(model_data.model_data); model_data.model_data = nullptr; } - return ret; } - return SUCCESS; + return ret; } Status GraphLoader::CommandHandle(const Command &command) { diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index 3632a10a..4704e4e2 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -32,9 +32,9 @@ namespace ge { class GraphLoader { public: - GraphLoader(); + GraphLoader() = default; - virtual ~GraphLoader(); + virtual ~GraphLoader() = default; GraphLoader(const GraphLoader &in) = delete; diff --git a/ge/graph/load/model_manager/davinci_model_parser.cc b/ge/graph/load/model_manager/davinci_model_parser.cc deleted file mode 100644 index c6f48b84..00000000 --- a/ge/graph/load/model_manager/davinci_model_parser.cc +++ /dev/null @@ -1,23 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/load/model_manager/davinci_model_parser.h" - -namespace ge { -DavinciModelParser::DavinciModelParser() {} - -DavinciModelParser::~DavinciModelParser() {} -} // namespace ge diff --git a/ge/graph/load/model_manager/davinci_model_parser.h b/ge/graph/load/model_manager/davinci_model_parser.h deleted file mode 100755 index 83eb4cc3..00000000 --- a/ge/graph/load/model_manager/davinci_model_parser.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ -#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ - -#include -#include - -#include "common/debug/log.h" -#include "common/ge_types.h" -#include "common/model_parser/base.h" -#include "common/types.h" -#include "common/util.h" - -namespace ge { -class DavinciModelParser : public ModelParserBase { - public: - /// - /// @ingroup hiai - /// @brief constructor - /// - DavinciModelParser(); - - /// - /// @ingroup hiai - /// @brief destructor - /// - ~DavinciModelParser(); -}; -} // namespace ge - -#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_PARSER_H_ diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 512c6e72..402bfa22 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -18,23 +18,15 @@ #include -#include "mmpa/mmpa_api.h" #include "aicpu/aicpu_schedule/aicpu_op_type_list.h" +#include "common/model_parser/model_parser.h" #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" #include "common/profiling/profiling_manager.h" -#include "common/properties_manager.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" -#include "graph/debug/ge_attr_define.h" #include "graph/load/model_manager/davinci_model.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" -#include "graph/common/local_context.h" -#include "graph/utils/attr_utils.h" #include "common/formats/utils/formats_trans_utils.h" -#include "hybrid/hybrid_davinci_model.h" namespace ge { thread_local uint32_t device_count = 0; @@ -1403,7 +1395,7 @@ Status ModelManager::LaunchCustAicpuSo() { Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) { uint8_t *model_data = nullptr; uint32_t model_len = 0; - Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); + Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); OmFileLoadHelper om_file_helper; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index b81067dd..9dc3e679 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -23,7 +23,7 @@ #include #include "common/debug/log.h" #include "common/debug/memory_dumper.h" -#include "common/model_parser/base.h" +#include "common/model_parser/model_parser.h" #include "common/properties_manager.h" #include "common/string_util.h" #include "common/types.h" diff --git a/ge/session/omg.cc b/ge/session/omg.cc index fe0a1a1d..bd1fd67c 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -23,7 +23,7 @@ #include "common/debug/memory_dumper.h" #include "common/ge/ge_util.h" #include "common/helper/model_helper.h" -#include "common/model_parser/base.h" +#include "common/model_parser/model_parser.h" #include "common/model_saver.h" #include "common/properties_manager.h" #include "common/string_util.h" diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 684dab77..b1a7d3ea 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -24,7 +24,6 @@ #include #include "common/helper/model_helper.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc index 811db2d2..33c6ca72 100644 --- a/tests/depends/omg/src/omg_stub.cc +++ b/tests/depends/omg/src/omg_stub.cc @@ -27,7 +27,7 @@ #include "common/util.h" #include "common/string_util.h" #include "common/properties_manager.h" -#include "common/model_parser/base.h" +#include "common/model_parser/model_parser.h" #include "graph/model.h" #include "cce/dnn.h" #include "ge/ge_api_types.h" diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index b8eb3e22..3c8fba71 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -163,7 +163,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" - "${GE_CODE_DIR}/ge/common/model_parser/base.cc" + "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" @@ -393,14 +393,13 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" "${GE_CODE_DIR}/ge/common/properties_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" - "${GE_CODE_DIR}/ge/common/model_parser/base.cc" + "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/util.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model_parser.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" @@ -458,7 +457,7 @@ set(GRAPH_BUILD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" - "${GE_CODE_DIR}/ge/common/model_parser/base.cc" + "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" "${GE_CODE_DIR}/ge/graph/build/run_context.cc" "${GE_CODE_DIR}/ge/graph/common/local_context.cc" ) diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3ef8a750..d1b1e761 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -34,7 +34,6 @@ #include "common/types.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/davinci_model.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/load/model_manager/model_manager.h" #include "graph/load/model_manager/task_info/kernel_task_info.h" #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" diff --git a/tests/ut/ge/graph/graph_load_unittest.cc b/tests/ut/ge/graph/graph_load_unittest.cc index 54972af7..cbcefd03 100644 --- a/tests/ut/ge/graph/graph_load_unittest.cc +++ b/tests/ut/ge/graph/graph_load_unittest.cc @@ -24,7 +24,6 @@ #include "common/helper/model_helper.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/op_desc.h" #include "graph/types.h" #include "graph/utils/attr_utils.h" diff --git a/tests/ut/ge/graph/load/model_manager_unittest.cc b/tests/ut/ge/graph/load/model_manager_unittest.cc index 81d88ecd..0e65954d 100644 --- a/tests/ut/ge/graph/load/model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/model_manager_unittest.cc @@ -25,7 +25,6 @@ #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/davinci_model.h" -#include "graph/load/model_manager/davinci_model_parser.h" using namespace std; using namespace testing; diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc index a68fb307..d22496ec 100644 --- a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -21,7 +21,7 @@ #include "common/debug/log.h" #include "common/l2_cache_optimize.h" -#include "common/model_parser/base.h" +#include "common/model_parser/model_parser.h" #include "common/properties_manager.h" #include "common/types.h" @@ -31,7 +31,6 @@ #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/davinci_model.h" -#include "graph/load/model_manager/davinci_model_parser.h" #include "graph/load/model_manager/model_manager.h" //#include "new_op_test_utils.h" #undef private From 861719fbe8e49f17f34c870ba1cb9c7e613a9b89 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 27 Feb 2021 15:34:33 +0800 Subject: [PATCH 32/44] zhushi ut build_single_op_online --- tests/ut/ge/generator/ge_generator_unittest.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc index 509b71c8..09ddf2ec 100644 --- a/tests/ut/ge/generator/ge_generator_unittest.cc +++ b/tests/ut/ge/generator/ge_generator_unittest.cc @@ -53,6 +53,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_offline) { EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); } +/* TEST_F(UtestGeGenerator, test_build_single_op_online) { GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); TensorUtils::SetSize(tensor_desc, 512); @@ -72,5 +73,6 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { ModelBufferData model_buffer; EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_SYS, model_buffer), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); } +*/ } // namespace ge From 8143392f00cfd491248606d277826c74bdfa1132 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 15:53:36 +0800 Subject: [PATCH 33/44] Add single_op model_id. --- ge/executor/ge_executor.cc | 14 ++++++- .../executor/hybrid_model_async_executor.cc | 3 +- ge/single_op/single_op_manager.cc | 9 ++-- ge/single_op/single_op_manager.h | 6 ++- inc/framework/executor/ge_executor.h | 5 +++ tests/ut/ge/CMakeLists.txt | 5 +++ tests/ut/ge/executor/ge_exeutor_unittest.cc | 42 +++++++++++++++++++ 7 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 tests/ut/ge/executor/ge_exeutor_unittest.cc diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index c4088421..fe223b1b 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -931,12 +931,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, SingleOp **single_op) { - return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); + return LoadSingleOp(model_name, modelData, stream, single_op, 0); +} + +Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + SingleOp **single_op, const uint64_t model_id) { + return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id); } Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op) { - return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); + return LoadDynamicSingleOp((model_name, modelData, stream, single_op, 0); +} + +Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + DynamicSingleOp **single_op, const uint64_t model_id) { + return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id); } Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector &inputs, diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 97fb9d50..967b17bf 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -251,7 +251,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy if (k >= shape.GetDimNum()) { break; } - if (shape.GetDim(k) < range[k].first || shape.GetDim(k) > range[k].second) { + // range[k].second can be -1 + if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) { GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]", input_index, k, shape.GetDim(k), range[k].first, range[k].second); return PARAM_INVALID; diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index ccbdbe3f..3cdb7f7d 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -30,8 +30,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManag FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFromModel(const std::string &model_name, const ModelData &model_data, void *stream, - SingleOp **single_op) { - GELOGI("GetOpFromModel in. model name = %s", model_name.c_str()); + SingleOp **single_op, + const uint64_t model_id) { + GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); if (single_op == nullptr) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null"); return ACL_ERROR_GE_INTERNAL_ERROR; @@ -99,7 +100,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, const ModelData &model_data, void *stream, - DynamicSingleOp **single_op) { + DynamicSingleOp **single_op, + const uint64_t model_id) { + GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id); if (!tiling_func_registered_) { RegisterTilingFunc(); } diff --git a/ge/single_op/single_op_manager.h b/ge/single_op/single_op_manager.h index e6d10980..c3fff3f4 100644 --- a/ge/single_op/single_op_manager.h +++ b/ge/single_op/single_op_manager.h @@ -37,12 +37,14 @@ class SingleOpManager { Status GetOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, - SingleOp **single_op); + SingleOp **single_op, + const uint64_t model_id); Status GetDynamicOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, - DynamicSingleOp **dynamic_single_op); + DynamicSingleOp **dynamic_single_op, + const uint64_t model_id); StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 59a1f8ab..ac08e473 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -260,6 +260,8 @@ class GE_FUNC_VISIBILITY GeExecutor { static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, SingleOp **single_op); + static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, + SingleOp **single_op, const uint64_t model_id); static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, std::vector &outputs); @@ -267,6 +269,9 @@ class GE_FUNC_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); + static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + DynamicSingleOp **single_op, const uint64_t model_id); + static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, const std::vector &inputs, std::vector &output_desc, std::vector &outputs); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index b8eb3e22..a2b4a6dd 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -760,6 +760,10 @@ set(GENERATOR_TEST_FILES "generator/ge_generator_unittest.cc" ) +set(EXECUTOR_TEST_FILES + "executor/ge_executor_unittest.cc" +) + set(SINGLE_OP_TEST_FILES "single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" @@ -1066,6 +1070,7 @@ target_link_libraries(ut_libge_kernel_utest add_executable(ut_libge_distinct_load_utest ${COMMON_TEST_FILES} ${GENERATOR_TEST_FILES} + ${EXECUTOR_TEST_FILES} ${DISTINCT_GRAPH_LOAD_TEST_FILES} ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} diff --git a/tests/ut/ge/executor/ge_exeutor_unittest.cc b/tests/ut/ge/executor/ge_exeutor_unittest.cc new file mode 100644 index 00000000..a98f9290 --- /dev/null +++ b/tests/ut/ge/executor/ge_exeutor_unittest.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "executor/ge_executor.h" +#include "graph/utils/tensor_utils.h" + +using namespace std; + +namespace ge { +class UtestGeExecutor : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestGeExecutor, test_single_op_exec) { + GeExecutor exeutor; + ModelData model_data; + string model_name = "1234"; + + EXPECT_EQ(exeutor.LoadSingleOp(model_name, model_data, nullptr, nullptr), ACL_ERROR_GE_INTERNAL_ERROR); + EXPECT_EQ(exeutor.LoadDynamicSingleOp(model_name, model_data, nullptr, nullptr), PARAM_INVALID); +} +} // namespace ge \ No newline at end of file From f3db5fe415e0372c2731a92715ee4238225f1872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Sat, 27 Feb 2021 15:55:47 +0800 Subject: [PATCH 34/44] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?= =?UTF-8?q?156=20:=20fix=20slice=20kernel=20compute=20error=20question'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/host_kernels/slice_kernel.cc | 39 ++------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index 0867ec2f..c3274465 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -16,8 +16,6 @@ #include "host_kernels/slice_kernel.h" -#include - #include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "common/types.h" @@ -33,30 +31,6 @@ const size_t kSliceInputSize = 3; const size_t kSliceInputIndexX = 0; const size_t kSliceInputIndexBegin = 1; const size_t kSliceInputIndexSize = 2; -const std::set kSupportedDataTypeToLength = { - DT_BOOL, - DT_INT64, - DT_UINT64, - DT_FLOAT, - DT_INT32, - DT_UINT32, - DT_INT8, - DT_UINT8, - DT_INT16, - DT_UINT16, - DT_FLOAT16, - DT_DOUBLE, - DT_DUAL, - DT_DUAL_SUB_INT8, - DT_DUAL_SUB_UINT8, - DT_COMPLEX64, - DT_COMPLEX128, - DT_QINT8, - DT_QINT16, - DT_QINT32, - DT_QUINT8, - DT_QUINT16, -}; } // namespace Status SliceKernel::Compute(const OpDescPtr attr, const std::vector &input, @@ -79,18 +53,9 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); - // check supported - if (kSupportedDataTypeToLength.count(data_type) == 0) { - GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str()); - return NOT_CHANGED; - } - uint32_t type_size = 0; - bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size); - if (!is_success) { - return NOT_CHANGED; - } // check data type of begin and size if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { GELOGW("Data type of begin and size for slice are not DT_INT32."); @@ -104,7 +69,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetData().size() / type_size; + size_t data_size = x_->GetData().size() / sizeof(int32_t); size_t begin_size = begin->GetData().size() / sizeof(int32_t); size_t size_size = size->GetData().size() / sizeof(int32_t); const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape(); From 5287444a69e2d1188134ca94af64513ac1c9dd48 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 15:59:23 +0800 Subject: [PATCH 35/44] Add single_op model_id. --- .../executor/{ge_exeutor_unittest.cc => ge_executor_unittest.cc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/ut/ge/executor/{ge_exeutor_unittest.cc => ge_executor_unittest.cc} (100%) diff --git a/tests/ut/ge/executor/ge_exeutor_unittest.cc b/tests/ut/ge/executor/ge_executor_unittest.cc similarity index 100% rename from tests/ut/ge/executor/ge_exeutor_unittest.cc rename to tests/ut/ge/executor/ge_executor_unittest.cc From 655c23ef71422c42888e0ff1ebc7e5249dc46a0a Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 27 Feb 2021 16:24:57 +0800 Subject: [PATCH 36/44] for ut cov --- ge/graph/load/model_manager/davinci_model.cc | 4 ++-- ge/graph/manager/graph_manager.cc | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 740a86f5..1da5af48 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -337,7 +337,7 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (is_feature_map_mem_has_inited_) { - GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once"); + GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once."); return PARAM_INVALID; } is_feature_map_mem_has_inited_ = true; @@ -360,7 +360,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); return ACL_ERROR_GE_MEMORY_ALLOCATION; } - GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", + GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu].", runtime_param_.graph_id, mem_base_, data_size); if (!is_inner_weight_base_) { diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 92951a89..6d1dcb04 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { return FAILED; } if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { - GELOGI("Need to process multi batch for compute graph."); + GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType(),c_str()); GetLocalOmgContext().need_multi_batch = true; break; } @@ -348,7 +348,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, for (auto &subgraph : compute_graph->GetAllSubgraphs()) { (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); } - GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); + GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]."); } GraphNodePtr graph_node = MakeShared(graph_id); @@ -734,8 +734,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, } Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { - GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, - static_cast(mode), ge::GetContext().DeviceId()); + GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", + session_id, graph_id, static_cast(mode), ge::GetContext().DeviceId()); rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { @@ -758,7 +758,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { GE_TIMESTAMP_START(RunCustomPass); GraphPtr graph = std::const_pointer_cast(const_graph); - GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", + GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); return SUCCESS; @@ -776,7 +776,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorBuildJsonObject(session_id, compute_graph->GetGraphID()), "BuildJsonObject Failed") - GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s", + GEEVENT("PreRun start, graph node size %zu, session id %lu, graph id %u, graph name %s.", compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(), compute_graph->GetName().c_str()); GE_DUMP(compute_graph, "PreRunBegin"); @@ -797,7 +797,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); + GELOGE(ret, "Run PreRunOptimizeOriginalGraph failed for graph:%s", compute_graph->GetName().c_str()); return ret; } } @@ -869,7 +869,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: // release rts generate context RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId()); if (ret != SUCCESS) { - GELOGE(ret, "PreRun Failed."); + GELOGE(ret, "PreRun Failed. graph_id:%u", graph_node->GetGraphId()); return ret; } } @@ -1209,7 +1209,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { - GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id); + GELOGD("[BuildGraph] start to build graph, graph_id:%u.", graph_id); if (inputs.empty()) { GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); } @@ -1241,7 +1241,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorSetRunFlag(false); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed!"); + GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u", graph_id); return GE_GRAPH_PRERUN_FAILED; } From e84eb1eb43c90ab180673d887807d68600ca0c26 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 16:27:59 +0800 Subject: [PATCH 37/44] Add single_op model_id. --- ge/executor/ge_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index fe223b1b..b3353af8 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -941,7 +941,7 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op) { - return LoadDynamicSingleOp((model_name, modelData, stream, single_op, 0); + return LoadDynamicSingleOp(model_name, modelData, stream, single_op, 0); } Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, From 535e4355acf097808d465ddf419cfef83bc31eb2 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Sat, 27 Feb 2021 16:37:00 +0800 Subject: [PATCH 38/44] fix compile error --- ge/graph/manager/graph_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 6d1dcb04..4c5d99c2 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -293,7 +293,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { return FAILED; } if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { - GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType(),c_str()); + GELOGI("Need to process multi batch for compute graph. op_type:%s", op_desc->GetType().c_str()); GetLocalOmgContext().need_multi_batch = true; break; } From 1c10a5ace33dc555aa6d15112042acb3db16a362 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 17:21:27 +0800 Subject: [PATCH 39/44] Add single_op model_id. --- ge/executor/ge_executor.cc | 8 ++++---- inc/framework/executor/ge_executor.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index b3353af8..f33d7758 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -931,20 +931,20 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, SingleOp **single_op) { - return LoadSingleOp(model_name, modelData, stream, single_op, 0); + return LoadSingleOpV2(model_name, modelData, stream, single_op, 0); } -Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, +Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, SingleOp **single_op, const uint64_t model_id) { return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id); } Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op) { - return LoadDynamicSingleOp(model_name, modelData, stream, single_op, 0); + return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0); } -Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, +Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op, const uint64_t model_id) { return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id); } diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index ac08e473..732e47aa 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -260,7 +260,7 @@ class GE_FUNC_VISIBILITY GeExecutor { static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, SingleOp **single_op); - static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream, + static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream, SingleOp **single_op, const uint64_t model_id); static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, @@ -269,7 +269,7 @@ class GE_FUNC_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); - static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op, const uint64_t model_id); static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, From c580af353b17f025259d612f86659e89f699be82 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 17:47:40 +0800 Subject: [PATCH 40/44] Add single_op model_id. --- inc/framework/executor/ge_executor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 732e47aa..9da630c9 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -261,7 +261,7 @@ class GE_FUNC_VISIBILITY GeExecutor { SingleOp **single_op); static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream, - SingleOp **single_op, const uint64_t model_id); + SingleOp **single_op, const uint64_t model_id); static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, std::vector &outputs); @@ -270,7 +270,7 @@ class GE_FUNC_VISIBILITY GeExecutor { DynamicSingleOp **single_op); static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, - DynamicSingleOp **single_op, const uint64_t model_id); + DynamicSingleOp **single_op, const uint64_t model_id); static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, const std::vector &inputs, std::vector &output_desc, From 9e6cca1879f1724a352bec154d13bbcc19412298 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 27 Feb 2021 18:07:04 +0800 Subject: [PATCH 41/44] add test case --- tests/depends/mmpa/CMakeLists.txt | 5 + tests/depends/mmpa/src/mmpa_stub.cc | 13 +- tests/depends/omg/CMakeLists.txt | 59 -- tests/depends/omg/src/omg_stub.cc | 878 ---------------------- tests/depends/runtime/src/runtime_stub.cc | 9 +- tests/ut/ge/CMakeLists.txt | 2 +- tests/ut/ge/graph/ge_executor_unittest.cc | 20 + 7 files changed, 42 insertions(+), 944 deletions(-) delete mode 100644 tests/depends/omg/CMakeLists.txt delete mode 100644 tests/depends/omg/src/omg_stub.cc diff --git a/tests/depends/mmpa/CMakeLists.txt b/tests/depends/mmpa/CMakeLists.txt index 567266cf..77a2ce13 100644 --- a/tests/depends/mmpa/CMakeLists.txt +++ b/tests/depends/mmpa/CMakeLists.txt @@ -29,6 +29,11 @@ include_directories(${GE_CODE_DIR}/inc/framework) include_directories(${GE_CODE_DIR}/metadef/inc/external) add_library(mmpa_stub SHARED ${SRCS}) + +target_compile_options(mmpa_stub PRIVATE + -g +) + target_link_libraries(mmpa_stub PRIVATE $ -Wl,--no-as-needed diff --git a/tests/depends/mmpa/src/mmpa_stub.cc b/tests/depends/mmpa/src/mmpa_stub.cc index de09c52c..5b6dbd22 100644 --- a/tests/depends/mmpa/src/mmpa_stub.cc +++ b/tests/depends/mmpa/src/mmpa_stub.cc @@ -230,7 +230,16 @@ INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone) INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen) { - return 0; + INT32 ret = EN_OK; + if (path == nullptr || realPath == nullptr || realPathLen < MMPA_MAX_PATH) { + return EN_INVALID_PARAM; + } + + char *ptr = realpath(path, realPath); + if (ptr == nullptr) { + ret = EN_ERROR; + } + return ret; } INT32 mmGetErrorCode() @@ -255,7 +264,7 @@ INT32 mmDlclose(VOID *handle) CHAR *mmDlerror() { - return ""; + return dlerror(); } INT32 mmDladdr(VOID *addr, mmDlInfo *info) diff --git a/tests/depends/omg/CMakeLists.txt b/tests/depends/omg/CMakeLists.txt deleted file mode 100644 index 50ce91b4..00000000 --- a/tests/depends/omg/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -#cmake_minimum_required(VERSION 2.8) - -project(OMG_CCE) - -set(CMAKE_CXX_STANDARD 11) - -include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${GE_CODE_DIR}/inc) -include_directories(${GE_CODE_DIR}/metadef/inc) -include_directories(${GE_CODE_DIR}/inc/framework) -include_directories(${GE_CODE_DIR}/metadef/inc/graph) -include_directories(${GE_CODE_DIR}/inc/external) -include_directories(${GE_CODE_DIR}/metadef/inc/external) -include_directories(${GE_CODE_DIR}/metadef/inc/external/graph) -include_directories(${GE_CODE_DIR}/ge) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) -set(PROTO_LIST - "${GE_CODE_DIR}/metadef/proto/om.proto" - "${GE_CODE_DIR}/metadef/proto/task.proto" -) - -protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) - -set(SRCS -# "${GE_CODE_DIR}/src/ge/common/util.cc" - "src/omg_stub.cc" -) - -add_library(omg_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS}) - -target_compile_definitions(omg_stub PRIVATE - google=ascend_private -) - -target_link_libraries(omg_stub PRIVATE - $ - -Wl,--no-as-needed - ascend_protobuf - -Wl,--as-needed - c_sec - json -) diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc deleted file mode 100644 index 33c6ca72..00000000 --- a/tests/depends/omg/src/omg_stub.cc +++ /dev/null @@ -1,878 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include "mmpa/mmpa_api.h" -#include "common/debug/log.h" -#include "common/debug/memory_dumper.h" -#include "common/types.h" -#include "common/util.h" -#include "common/string_util.h" -#include "common/properties_manager.h" -#include "common/model_parser/model_parser.h" -#include "graph/model.h" -#include "cce/dnn.h" -#include "ge/ge_api_types.h" -#include "framework/common/ge_types.h" -#include "graph/utils/op_desc_utils.h" -#include "common/profiling/profiling_manager.h" - -using domi::domiTensorFormat_t; -using namespace cce; -using namespace ge; - -struct PROC_PARAM { - uint8_t *model_name; - - // ISV Ek buffer - uint8_t *model_key; - uint32_t model_key_len; - - // ISV root certificate buffer - uint8_t *root_cert; - uint32_t root_cert_len; - - // ISV private key buffer - uint8_t *pri_key; - uint32_t pri_key_len; - - // Raw AI Module Image buffer - uint8_t *ai_image; - uint32_t ai_image_len; - - // ISV HW key buffer - uint8_t *hw_key; - uint32_t hw_key_len; -}; - -#ifdef __cplusplus -extern "C" { -#endif -using namespace ge; -namespace { -const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR"; -const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework"; -const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result"; -const char FILE_SEPARATE[] = "/"; -} // namespace - -#ifdef __cplusplus -} -#endif - -namespace ge { -struct GeModelPartition { - ModelPartitionType type_ = MODEL_DEF; - uint8_t *data_ = nullptr; - size_t size_ = 0; - - GeModelPartition() = default; - - GeModelPartition(const GeModelPartition &partition){}; - - GeModelPartition &operator=(const GeModelPartition &partition) = delete; - - ~GeModelPartition() { - if (data_ != nullptr) { - delete[] data_; - data_ = nullptr; - } - } - - Status SetData(uint8_t *data, size_t size) { - size_ = size; - data_ = new (std::nothrow) uint8_t[size](); - errno_t err; - err = memcpy_s(data_, size_, data, size); - if (err) { - GELOGE(ge::FAILED, "[GeModel Partition] Error occur when copy GeModel Partition data."); - return FAILED; - } - return SUCCESS; - } - - Status SetType(ModelPartitionType type) { - type_ = type; - return SUCCESS; - } -}; -struct OmFileContext { - vector partition_datas_; - vector partition_table_; - uint32_t model_data_len_; -}; - -class SubGraphInfo; -using SubGraphInfoPtr = std::shared_ptr; - -using GeModelPartitionPtr = std::shared_ptr; -using ModelPtr = std::shared_ptr; -class GeModel { - public: - explicit GeModel(const ModelPtr &model_ptr); - ~GeModel() = default; - GeModel(const GeModel &other) = delete; - GeModel &operator=(const GeModel &other) = delete; - - ModelPtr GetModelPtr() const; - Status AddPartition(uint8_t *data, size_t size, ModelPartitionType type); - Status GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition); - uint8_t GetPlatformType() const; - void SetPlatformType(const uint8_t platform_type) { platform_type_ = platform_type; } - - private: - std::map partitions_; - ModelPtr model_ = nullptr; - uint8_t platform_type_ = {0}; -}; -using GeModelPtr = std::shared_ptr; - -GeModel::GeModel(const ModelPtr &model_ptr) { this->model_ = model_ptr; } - -ModelPtr GeModel::GetModelPtr() const { return this->model_; } - -uint8_t GeModel::GetPlatformType() const { return platform_type_; } - -Status GeModel::AddPartition(uint8_t *data, size_t size, ModelPartitionType type) { - if (size == 0) { - return FAILED; - } - - if (data == nullptr) { - return FAILED; - } - - auto iter = partitions_.find(type); - if (iter != partitions_.end()) { - return FAILED; - } - - GeModelPartitionPtr partition = nullptr; - GE_MAKE_SHARED(partition = std::make_shared(), return FAILED); - Status ret = partition->SetType(type); - if (ret != SUCCESS) { - return FAILED; - } - ret = partition->SetData(data, size); - if (ret != SUCCESS) { - return FAILED; - } - - partitions_.insert(std::pair(type, partition)); - return SUCCESS; -} - -Status GeModel::GetPartition(ModelPartitionType type, GeModelPartitionPtr &partition) { - auto iter = partitions_.find(type); - if (iter == partitions_.end()) { - return FAILED; - } - - partition = iter->second; - return SUCCESS; -} -class OmFileSaveHelper { - public: - OmFileSaveHelper(); - ~OmFileSaveHelper(); - vector &GetModelPartitions(); - ModelPartitionTable *GetPartitionTable(); - ModelFileHeader model_header_; - ModelFileHeader &GetModelFileHeader() { return model_header_; } - void AddPartition(GeModelPartition &partition); - - private: - OmFileContext context_; -}; - -OmFileSaveHelper::OmFileSaveHelper() {} - -OmFileSaveHelper::~OmFileSaveHelper() {} - -vector &OmFileSaveHelper::GetModelPartitions() { - static std::vector tmp; - return tmp; -} - -ModelPartitionTable *OmFileSaveHelper::GetPartitionTable() { return nullptr; } - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OmFileSaveHelper::AddPartition(GeModelPartition &partition) { - context_.partition_datas_.push_back(partition); - context_.model_data_len_ += partition.size_; -} -class ModelBuilder { - public: - ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector &subgraphs, - const std::map &stream_max_parallel_num, bool hcom_parallel, int mode); - virtual ~ModelBuilder(); - Status BuildModel(ge::Model &model_def); - Status SaveWeightsToModel(ge::Model &model); - Status SaveDataToModel(ge::Model &model, ge::GeModel &ge_model); - Status PreBuildModel(); - Status BuildModelForGetTask(ge::Model &model_def); - ge::Buffer GetWeightBuffer() const; - void SetModelVersion(ge::Model &model_def); - - public: - ge::Buffer weight_buffer_; -}; - -ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const std::vector &subgraphs, - const std::map &stream_max_parallel_num, bool hcom_parallel, int mode) { - weight_buffer_ = ge::Buffer(4100000); -} - -ModelBuilder::~ModelBuilder() {} - -Status ModelBuilder::SaveWeightsToModel(ge::Model &model) { return SUCCESS; } - -Status ModelBuilder::BuildModel(ge::Model &model_def) { return SUCCESS; } - -Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { return SUCCESS; } - -Status ModelBuilder::PreBuildModel() { return SUCCESS; } - -Status ModelBuilder::BuildModelForGetTask(ge::Model &model_def) { return SUCCESS; } - -void ModelBuilder::SetModelVersion(ge::Model &model_def) { return; } - -ge::Buffer ModelBuilder::GetWeightBuffer() const { return ge::Buffer(4100000); } - -} // namespace ge - -using ProcParam = struct PROC_PARAM; - -namespace ge { -#include -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N = 0; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C = 1; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_H = 2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_W = 3; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_N = 0; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_H = 1; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_W = 2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NHWC_DIM_C = 3; - -const uint32_t MODEL_FILE_MAGIC_NUM = 0x444F4D49; -const uint32_t MODEL_FILE_HEAD_LEN = 256; -const uint32_t MODEL_VERSION = 0x10000000; -const int MAX_FILE_SIZE_LIMIT = INT_MAX; -bool FC_WEIGHT_COMPRESS_FLAG = false; - -bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length) { - length = 10; - *buffer = new (std::nothrow) char[10](); - GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(*buffer == nullptr, false, "new an object failed."); - return true; -} -bool ReadProtoFromText(const char *file, google::protobuf::Message *message) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((nullptr == file || nullptr == message), return false, - "incorrect parameter. nullptr == file || nullptr == message"); - string real_path = RealPath(file); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "proto file path '%s' not valid", file); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path.c_str()) == -1, return false, "file size not valid."); - std::ifstream fs(real_path.c_str(), std::ifstream::in); - - if (!fs.is_open()) { - GELOGE(ge::FAILED, "proto file '%s' open fail.", file); - return false; - } - google::protobuf::io::IstreamInputStream input(&fs); - bool ret = google::protobuf::TextFormat::Parse(&input, message); - GE_IF_BOOL_EXEC(ret != true, - GELOGI("call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); - fs.close(); - return ret; -} - -uint64_t GetCurrentTimestap() { return 0; } - -// get length of file -long GetFileLength(const std::string &input_file) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(input_file.empty(), return -1, "input_file path is null."); - string real_path = RealPath(input_file.c_str()); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); - unsigned long long file_length = 0; - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, return -1, - "open file failed."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length <= 0), return -1, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > MAX_FILE_SIZE_LIMIT, return -1, "file size %llu is out of limit: %d.", - file_length, MAX_FILE_SIZE_LIMIT); - return file_length; -} -string RealPath(const char *path) { - string s = path; - if (s.size() >= PATH_MAX) { - return ""; - } - if (s == "." || s == "1") { - return path; - // for insert_aipp_op unittest - } else if (s.substr(0, 3) == "llt") { - return path; - } else { - return "22"; - } -} - -bool CheckInputPathValid(const string &file_path) { return true; } -bool ReadProtoFromArray(const void *data, int size, Message *proto) { return true; } - -struct ModelPartition { - ModelPartitionType type; - uint8_t *data = 0; - uint32_t size = 0; -}; - -class InsertNewOpUtil { - public: - InsertNewOpUtil(); - ~InsertNewOpUtil(); - Status InsertNewOps(const ComputeGraphPtr &graph); - Status InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path); - Status Parse(const char *conf_path); -}; - -InsertNewOpUtil::InsertNewOpUtil() {} - -Status InsertNewOpUtil::InsertNewOps(const ComputeGraphPtr &graph) { return SUCCESS; } - -Status InsertNewOpUtil::InsertAippOps(ge::ComputeGraphPtr graph, std::string &aipp_config_path) { return SUCCESS; } - -Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; } - -Status InitOME() { return SUCCESS; } -class GraphOptimizer { - public: - Status Optimize(); - Status OptimizeAfterCal(); - Status AdjustDataOpDesc(); - Status InsertTransOp(); - Status FusionFmkop(); - Status Optimize4Cloud(); - Status Optimize4FlowCtrl(); - Status OptimizeBeforeBuild(); -}; -Status GraphOptimizer::Optimize() { return SUCCESS; } - -Status Init(Options options) { return SUCCESS; } - -Status Shutdown(Options options) { return SUCCESS; } - -class Session { - public: - // singleton - static Session *Instance(); - const uint32_t &DeviceId() const; -}; - -const uint32_t &Session::DeviceId() const { return 0; } - -Session *Session::Instance() { - static Session instance; - return &instance; -} -struct OmgContext { - domiTensorFormat_t format; - - // get input format from cmd - std::unordered_map input_nodes_format_map; - std::vector output_formats; - - // user-designate input dims - std::vector>> user_input_dims; - // global input dims - std::map> input_dims; - - // solve rename op e.g: Detectionoutput:SsdDetectiontOutput - std::map op_conf_map; - // save output node of network: key is op name, value = index, index is the output index of op - std::map> out_nodes_map; - // user-designate out nodes (this is used for determing the orders) - std::vector> user_out_nodes; - // save the path of cutsom_aicpu - std::vector aicpu_op_run_paths; - // save ddk - std::string ddk_version; - // save format - domiTensorFormat_t net_format; - - FrameworkType type; - // RunMode run_mode; - bool train_flag = false; - - std::string output_type; - - /// save the name of network - /// eg:faster-rcnn, based on FirstStageProcessor after scope_fusion is faster-rcnn - /// then reorder conv+reshape of FirstStageBoxPredictor/BoxEncodingPredictor - /// need to delete op of reshape - std::string net_name; -}; -} // namespace ge - -namespace domi { -ge::OmgContext &GetContext() { - static ge::OmgContext tmp; - return tmp; -} -} // namespace domi - -namespace ge { -class OpUtils { - public: - static Status InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor); - static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, - ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); - static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor); -}; -Status OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, ccTensorDescriptor_t &cc_tensor) { - ccCreatePoolingMaskDescriptor(&cc_tensor); - return SUCCESS; -} -Status OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, - ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { - Status ret = SUCCESS; - return ret; -} - -class FileSaver { - public: - Status SaveToFile(const string &file_path, ModelFileHeader &model_file_header, - ModelPartitionTable &model_partition_table, const std::vector &partition_datas); - Status SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, - const ModelFileHeader *model_file_header, bool check_sum); -}; - -Status FileSaver::SaveToFile(const string &file_path, ModelFileHeader &model_file_header, - ModelPartitionTable &model_partition_table, - const std::vector &partition_datas) { - return SUCCESS; -} - -Status FileSaver::SaveToFileWithEncrypt(const std::string file_path, const ProcParam proc_param, - const ModelFileHeader *model_file_header, bool check_sum) { - return SUCCESS; -} - -class ModelSaver : public FileSaver {}; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor( - ccTensorDescriptor_t &cc_tensor) { - if (nullptr != cc_tensor) { - ccStatus_t ret = ccDestroyTensorDescriptor(&cc_tensor); - GE_LOGE_IF(CC_STATUS_SUCCESS != ret, "ccDestroyTensorDescriptor failed. ret = %d", ret); - cc_tensor = nullptr; - } -} - -} // namespace ge - -namespace domi { -class OpRegistrationData {}; - -class OpRegistry { - public: - static OpRegistry *Instance(); - std::vector registration_datas; - - ImplyType GetImplyType(const std::string &op_type); - void GetOpTypeByImplyType(std::vector &vec_op_type, const ImplyType &imply_type); -}; - -OpRegistry *OpRegistry::Instance() { - static OpRegistry instance; - return &instance; -} - -void OpRegistry::GetOpTypeByImplyType(std::vector &vec_op_type, const ImplyType &imply_type) { - if (imply_type == ImplyType::AI_CPU) { - vec_op_type.push_back("square"); - } -} - -class OpRegistrationTbe { - public: - static OpRegistrationTbe *Instance(); - - bool Finalize(OpRegistrationData ®_data, bool is_train); -}; - -OpRegistrationTbe *OpRegistrationTbe::Instance() { - static OpRegistrationTbe instance; - return &instance; -} - -bool OpRegistrationTbe::Finalize(OpRegistrationData ®_data, bool is_train) { return true; } -} // namespace domi - -namespace ge { -class GraphPrepare { - private: - Status OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph); -}; - -Status GraphPrepare::OptimizeForPreprocess(ge::ComputeGraphPtr &compute_graph) { return SUCCESS; } -} // namespace ge - -namespace ge { - -Status GetOriginalType(const ge::NodePtr &node, string &type) { - type = node->GetType(); - GE_IF_BOOL_EXEC(type != FRAMEWORKOP, return SUCCESS); - ge::AttrUtils::GetStr(node->GetOpDesc(), "original_type", type); - return SUCCESS; -} - -Status SetCycleEvent(const ge::NodePtr &node) { return SUCCESS; } - -Status SetStreamLabel(const ge::NodePtr &node, const std::string &label) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = AttrUtils::CloneOpDesc(node->GetOpDesc()); - GE_CHECK_NOTNULL(tmp_desc); - - if (!AttrUtils::SetStr(tmp_desc, "_stream_label", label)) { - GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_STREAM_LABEL failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetActiveLabelList(const ge::NodePtr &node, const std::vector &label) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - // add list of active_label - if (!AttrUtils::SetListStr(tmp_desc, "_active_label", label)) { - GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_ACTIVE_LABEL_LIST failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetSwitchBranchNodeLabel(const ge::NodePtr &node, const std::string &branch_label) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - // add branch_label of switch - if (!AttrUtils::SetStr(tmp_desc, "_switch_branch_node_label", branch_label)) { - GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_BRANCH_NODE_LABEL failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetSwitchTrueBranchFlag(const ge::NodePtr &node, bool value) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - // add switch_true_branch_flag - if (!AttrUtils::SetBool(tmp_desc, "_switch_true_branch_flag", value)) { - GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetOriginalNodeName(const ge::NodePtr &node, const std::string &orig_name) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - // record original_node_name - if (!AttrUtils::SetStr(tmp_desc, "_original_node_name", orig_name)) { - GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_ORIG_NODE_NAME failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetCyclicDependenceFlag(const ge::NodePtr &node) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - // add cyclic_dependence_flag - if (!AttrUtils::SetBool(tmp_desc, "_cyclic_dependence_flag", true)) { - GELOGE(ge::FAILED, "Op :%s set ATTR_NAME_CYCLIC_DEPENDENCE_FLAG failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status SetNextIteration(const ge::NodePtr &node, const std::string &next) { - GE_CHECK_NOTNULL(node); - OpDescPtr tmp_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(tmp_desc); - - if (!AttrUtils::SetStr(tmp_desc, "_next_iteration_node", next)) { - GELOGE(ge::FAILED, "Op: %s set ATTR_NAME_NEXT_ITERATION failed", node->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} -} // namespace ge - -namespace cce { -bool ccGetFuncState(ccFuncParamType_t type) { return true; } -} // namespace cce - -namespace ge { -Status UnloadModel(uint32_t model_id) { return SUCCESS; } - -Status GetInputOutputDescInfo(uint32_t model_id, vector &input_desc, - vector &output_desc) { - return SUCCESS; -} - -Status DataInput(const InputData *input_data, OutputData *output_data) { return SUCCESS; } -/* -class ModelManager { - public: - static std::shared_ptr GetInstance(); - static void FinalizeForPtr(ModelManager *) {} - Status DataInputTensor(uint32_t model_id, const std::vector &inputs, - std::vector &outputs); - Status DataInput(const InputData &input_data, OutputData &output_data); - Status GetInputOutputDescInfo(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc); - Status GetInputOutputDescInfo(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, std::vector &input_formats, - std::vector &output_formats); - Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, - std::vector &input_formats, std::vector &output_formats); - Status Stop(uint32_t model_id); - Status Unload(uint32_t model_id); - Status LoadModelOnline(uint32_t &model_id, std::shared_ptr &model, - std::shared_ptr listener); - Status Start(uint32_t model_id); - Status GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size); - Status LoadModelOffline(uint32_t &model_id, const ModelData &model, std::shared_ptr listener = nullptr, - void *dev_ptr = nullptr, size_t mem_size = 0, void *weight_ptr = nullptr, - size_t weight_size = 0); - Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector &input_queue_ids, - const std::vector &output_queue_ids); - - Status HandleCommand(const Command &command); - Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data); - void DestroyAicpuSession(uint64_t session_id); -}; -void ModelManager::DestroyAicpuSession(uint64_t session_id) {} -std::shared_ptr ModelManager::GetInstance() { - static std::shared_ptr instance_ptr = - shared_ptr(new ModelManager(), ModelManager::FinalizeForPtr); - return instance_ptr; -} - -Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector &inputs, - std::vector &outputs) { - return SUCCESS; -} - -Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { return SUCCESS; } - -Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - return SUCCESS; -} - -Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc) { - return SUCCESS; -} - -Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, - std::vector &input_desc, - std::vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - return SUCCESS; -} - -Status ModelManager::Stop(uint32_t model_id) { return SUCCESS; } - -Status ModelManager::Unload(uint32_t model_id) { return SUCCESS; } - -Status ModelManager::LoadModelOnline(uint32_t &model_id, std::shared_ptr &model, - std::shared_ptr listener) { - return SUCCESS; -} - -Status ModelManager::Start(uint32_t model_id) { return SUCCESS; } - -Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { return SUCCESS; } - -Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, - void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { - return SUCCESS; -} - -Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, - const std::vector &input_queue_ids, - const std::vector &output_queue_ids) { - return SUCCESS; -} - -Status ModelManager::HandleCommand(const Command &command) { return SUCCESS; } - -Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data) { - return SUCCESS; -} - -*/ - -} // namespace ge - -namespace ge { - -enum JobState { - JOBSTATE_WAITING = 1, - JOBSTATE_RUNNING, - JOBSTATE_KILLING, - JOBSTATE_SUCCEED, - JOBSTATE_FAILED, - JOBSTATE_KILLED, - JOBSTATE_UNKOWN -}; - -enum JobSubState { - JOBSUBSTATE_ENV_INIT = 201, - JOBSUBSTATE_ENV_FIN, - JOBSUBSTATE_RESOUCE_ALLOC, - JOBSUBSTATE_MODEL_COMPILE, - JOBSUBSTATE_GRAPH_PREPARE, - JOBSUBSTATE_GRAPH_SPLIT, - JOBSUBSTATE_GRAPH_OPTIMIZE, - JOBSUBSTATE_GRAPH_BUILD, - JOBSUBSTATE_GRAPH_LOAD, - JOBSUBSTATE_GRAPH_EXEC, - JOBSUBSTATE_GRAPH_UNLOAD, - JOBSUBSTATE_OTHER -}; - -enum ErrorModule { - ERROR_MODULE_DRIVER = 0x01, - ERROR_MODULE_RUNTIME = 0x04, - ERROR_MODULE_CCE = 0x06, - ERROR_MODULE_FMK = 0x08, - ERROR_MODULE_HCCL = 0x12 -}; - -class CsaInteract { - public: - CsaInteract &GetInstance(); - void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state); - void Init(int32_t dev_index, int64_t job_id); - Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER, - uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK); - // device index - int32_t dev_index_; - // job id - int64_t job_id_; - // is initialization complete - bool is_init_; - // current job state - JobState curr_state_; - // job state file - std::string job_state_file_; - // network connectivity detect file - std::string hcom_detect_file_; - // identification of internal errors that occurred during the training - bool is_have_internal_error_; -}; - -CsaInteract &CsaInteract::GetInstance() { - static CsaInteract instance; - return instance; -} - -void CsaInteract::Init(int32_t dev_index, int64_t job_id) { - if (!is_init_) { - dev_index_ = dev_index; - job_id_ = job_id; - string csa_path_prefix; - if (std::getenv(FMK_STATUS_FILE_DIR_ENV) != nullptr) { - csa_path_prefix = std::getenv(FMK_STATUS_FILE_DIR_ENV); - } - if (!csa_path_prefix.empty()) { - std::string job_state_file = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME; - std::string hcom_detect_file = - csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME; - job_state_file_ = RealPath(job_state_file.c_str()); - hcom_detect_file_ = RealPath(hcom_detect_file.c_str()); - } - is_init_ = true; - } -} - -void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) {} - -} // namespace ge - -Status ModelParserBase::LoadFromFile(const char *model_path, const char *key, int32_t priority, - ge::ModelData &model_data) { - return SUCCESS; -} - -Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode, - ErrorModule error_module) { - return SUCCESS; -} - -namespace ge { - -static std::map data_type_to_length = { - {DT_BOOL, sizeof(bool)}, {DT_INT64, sizeof(int64_t)}, {DT_UINT64, sizeof(int64_t)}, {DT_FLOAT, sizeof(float)}, - {DT_INT32, sizeof(int32_t)}, {DT_UINT32, sizeof(int32_t)}, {DT_INT8, sizeof(char)}, {DT_UINT8, sizeof(char)}, - {DT_INT16, sizeof(int16_t)}, {DT_UINT16, sizeof(int16_t)}, {DT_FLOAT16, sizeof(int16_t)}, {DT_DOUBLE, sizeof(double)}, -}; - -class TypeUtils { - public: - static bool GetDataTypeLength(ge::DataType data_type, uint32_t &length); - static bool CheckUint64MulOverflow(uint64_t a, uint32_t b); -}; - -bool TypeUtils::GetDataTypeLength(ge::DataType data_type, uint32_t &length) { - auto it = data_type_to_length.find(data_type); - if (it != data_type_to_length.end()) { - length = it->second; - return true; - } else { - return false; - } -} - -bool TypeUtils::CheckUint64MulOverflow(uint64_t a, uint32_t b) { - // Not overflow - if (a == 0) { - return false; - } - if ((ULLONG_MAX / a) >= b) { - return false; - } - return true; -} -} // namespace ge diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 3808e5d6..e6a7d66b 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -27,8 +27,8 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { } rtError_t rtCtxGetCurrent(rtContext_t *ctx) { - int x = 1; - *ctx = (void *)x; + uintptr_t x = 1; + *ctx = (rtContext_t *)x; return RT_ERROR_NONE; } @@ -163,7 +163,7 @@ rtError_t rtSetKernelReportCallback(rtKernelReportCallback callback) { rt_kernel_info.module_addr = (void *)100; rt_kernel_info.module_size = 100; - rtStream_t stream; + rtStream_t stream = nullptr; callback(stream, &rt_kernel_info); return RT_ERROR_NONE; } @@ -200,7 +200,8 @@ rtError_t rtModelCreate(rtModel_t *model, uint32_t flag) { } rtError_t rtModelDestroy(rtModel_t model) { - delete model; + uint32_t *stub = static_cast(model); + delete stub; return RT_ERROR_NONE; } diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 3c8fba71..91e18796 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -135,6 +135,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/types.cc" "${GE_CODE_DIR}/ge/common/fmk_error_codes.cc" "${GE_CODE_DIR}/ge/common/op/ge_op_utils.cc" + "${GE_CODE_DIR}/ge/common/context/ctx.cc" "${GE_CODE_DIR}/ge/graph/manager/util/variable_accelerate_ctrl.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" "${GE_CODE_DIR}/ge/generator/ge_generator.cc" @@ -779,7 +780,6 @@ set(OTHERS_TEST_FILES ) list(APPEND COMMON_SHARED_LIBRARIES - omg_stub c_sec slog_stub cce_ge_stub diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index d1b1e761..e26aa86e 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -108,6 +108,26 @@ static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { ge::AttrUtils::SetInt(op_desc, ge::ATTR_NAME_STREAM_SWITCH_COND, 0); return op_desc; } + +TEST_F(UtestGeExecutor, load_data_from_file) { + GeExecutor ge_executor; + ge_executor.isInit_ = true; + + string test_smap = "/tmp/" + std::to_string(getpid()) + "_maps"; + string self_smap = "/proc/" + std::to_string(getpid()) + "/maps"; + string copy_smap = "cp " + self_smap + " " + test_smap; + EXPECT_EQ(system(copy_smap.c_str()), 0); + + ModelData model_data; + EXPECT_EQ(ge_executor.LoadDataFromFile(test_smap, model_data), SUCCESS); + + EXPECT_NE(model_data.model_data, nullptr); + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + + ge_executor.isInit_ = false; +} + /* TEST_F(UtestGeExecutor, fail_UnloadModel_model_manager_stop_unload_error) { uint32_t model_id = 1; From 608c054f2beb933cee1cf4d19772a3c9bbd264ef Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 27 Feb 2021 18:09:51 +0800 Subject: [PATCH 42/44] Add single_op model_id. --- ge/executor/ge_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index f33d7758..d02ae3dc 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -935,7 +935,7 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa } Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, - SingleOp **single_op, const uint64_t model_id) { + SingleOp **single_op, const uint64_t model_id) { return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id); } @@ -945,7 +945,7 @@ Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge:: } Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream, - DynamicSingleOp **single_op, const uint64_t model_id) { + DynamicSingleOp **single_op, const uint64_t model_id) { return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id); } From e8dd78ec648ba317d7a62c9cfaa5017a8b91f03a Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 27 Feb 2021 18:21:11 +0800 Subject: [PATCH 43/44] remove stub depends/omg --- tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index abea9fdc..a56705e0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,7 +19,6 @@ add_subdirectory(depends/cce) add_subdirectory(depends/slog) add_subdirectory(depends/mmpa) add_subdirectory(depends/runtime) -add_subdirectory(depends/omg) add_subdirectory(depends/hccl) add_subdirectory(depends/profiler) add_subdirectory(depends/error_manager) From a1017c508c78dbba32160cc3e2ff4b0de8858203 Mon Sep 17 00:00:00 2001 From: wxl Date: Mon, 1 Mar 2021 10:32:03 +0800 Subject: [PATCH 44/44] fix slice kernel bug --- ge/host_kernels/slice_kernel.cc | 38 ++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index c3274465..025d7f66 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -16,6 +16,8 @@ #include "host_kernels/slice_kernel.h" +#include + #include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "common/types.h" @@ -31,6 +33,30 @@ const size_t kSliceInputSize = 3; const size_t kSliceInputIndexX = 0; const size_t kSliceInputIndexBegin = 1; const size_t kSliceInputIndexSize = 2; +const std::set kSupportedDataTypeToLength = { + DT_BOOL, + DT_INT64, + DT_UINT64, + DT_FLOAT, + DT_INT32, + DT_UINT32, + DT_INT8, + DT_UINT8, + DT_INT16, + DT_UINT16, + DT_FLOAT16, + DT_DOUBLE, + DT_DUAL, + DT_DUAL_SUB_INT8, + DT_DUAL_SUB_UINT8, + DT_COMPLEX64, + DT_COMPLEX128, + DT_QINT8, + DT_QINT16, + DT_QINT32, + DT_QUINT8, + DT_QUINT16, +}; } // namespace Status SliceKernel::Compute(const OpDescPtr attr, const std::vector &input, @@ -56,6 +82,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); + // check supported + if (kSupportedDataTypeToLength.count(data_type) == 0) { + GELOGW("input_x data_type is [%s], does not supported!", TypeUtils::DataTypeToSerialString(data_type).c_str()); + return NOT_CHANGED; + } + uint32_t type_size = 0; + bool is_success = TypeUtils::GetDataTypeLength(data_type, type_size); + if (!is_success) { + return NOT_CHANGED; + } // check data type of begin and size if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { GELOGW("Data type of begin and size for slice are not DT_INT32."); @@ -69,7 +105,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetData().size() / sizeof(int32_t); + size_t data_size = x_->GetData().size() / type_size; size_t begin_size = begin->GetData().size() / sizeof(int32_t); size_t size_size = size->GetData().size() / sizeof(int32_t); const ge::GeShape &x_shape = x_->GetTensorDesc().GetShape();