From 8d2561748bc018a05e3a65719a3735f25b18249a Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Thu, 17 Dec 2020 13:58:41 +0800 Subject: [PATCH 01/33] unify mutex for model mananger --- .../load/new_model_manager/model_manager.cc | 26 +++++++++---------- .../load/new_model_manager/model_manager.h | 3 +-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index fdc4915f..76c90277 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -91,6 +91,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); + std::lock_guard lock(map_mutex_); auto iter = model_aicpu_kernel_.find(model_key); if (iter != model_aicpu_kernel_.end()) { GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); @@ -178,7 +179,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } void ModelManager::DestroyAicpuSession(uint64_t session_id) { - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); auto it = sess_ids_.find(session_id); if (it == sess_ids_.end()) { GELOGI("The session: %lu not created.", session_id); @@ -207,7 +208,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { } ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto hybrid_davinci_model = hybrid_model_map_.find(model_id); if (hybrid_davinci_model != hybrid_model_map_.end()) { uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); @@ -227,7 +228,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -240,7 +241,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { @@ -252,7 +253,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i } ModelManager::~ModelManager() { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); cust_aicpu_so_.clear(); @@ -360,18 +361,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &davinci_model) { GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_[id] = davinci_model; } void ModelManager::InsertModel(uint32_t id, shared_ptr &hybrid_model) { GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); hybrid_model_map_[id] = hybrid_model; } Status ModelManager::DeleteModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); auto hybrid_model_it = hybrid_model_map_.find(id); @@ -394,14 +395,14 @@ Status ModelManager::DeleteModel(uint32_t id) { } std::shared_ptr ModelManager::GetModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); return (it == model_map_.end()) ? nullptr : it->second; } std::shared_ptr ModelManager::GetHybridModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = hybrid_model_map_.find(id); return (it == hybrid_model_map_.end()) ? nullptr : it->second; @@ -1277,7 +1278,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy } Status ModelManager::CreateAicpuSession(uint64_t session_id) { - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); auto it = sess_ids_.find(session_id); // never been created by any model if (it == sess_ids_.end()) { @@ -1496,8 +1497,7 @@ void ModelManager::GenModelId(uint32_t *id) { if (id == nullptr) { return; } - - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); *id = ++max_model_id_; } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index dc3a5219..80cc0a5d 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -355,8 +355,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::map> hybrid_model_map_; std::map> model_aicpu_kernel_; uint32_t max_model_id_; - std::mutex map_mutex_; - std::mutex sess_ids_mutex_; + std::recursive_mutex map_mutex_; std::mutex session_id_create_mutex_; static::std::mutex exeception_infos_mutex_; uint64_t session_id_bias_; From 30ae5dbb1014d0accbda20e9acadbfc538805479 Mon Sep 17 00:00:00 2001 From: baker Date: Thu, 17 Dec 2020 19:32:50 +0800 Subject: [PATCH 02/33] add onnx model parse api --- inc/framework/common/ge_types.h | 3 +++ inc/framework/omg/parser/parser_inner_ctx.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index fb1f0be1..eea9824b 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -37,6 +37,9 @@ enum FrameworkType { MINDSPORE = 1, TENSORFLOW = 3, ANDROID_NN, +#ifndef ONLY_COMPILE_OPEN_SRC + ONNX, +#endif FRAMEWORK_RESERVED, }; diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h index f24e2639..5d91bd46 100644 --- a/inc/framework/omg/parser/parser_inner_ctx.h +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -59,7 +59,7 @@ struct ParserContext { bool train_flag = false; domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; - RunMode run_mode = ONLY_PRE_CHECK; + RunMode run_mode = GEN_OM_MODEL; // save caffe custom proto path, used by caffe parse std::string custom_proto_path; // save caffe proto path, used by caffe parse From 01a18c3fd51c5a6157a2e83488849b18312d90df Mon Sep 17 00:00:00 2001 From: l00444296 Date: Fri, 18 Dec 2020 19:15:28 +0800 Subject: [PATCH 03/33] Feature: reset shape of dynamic single op --- ge/CMakeLists.txt | 2 + ge/ge_inference.mk | 1 + ge/ge_runner.mk | 1 + ge/generator/ge_generator.cc | 52 +++++- ge/graph/manager/graph_manager.cc | 14 +- .../dynamic_single_op_reset_shape_pass.cc | 159 ++++++++++++++++++ .../dynamic_single_op_reset_shape_pass.h | 36 ++++ tests/ut/ge/CMakeLists.txt | 1 + 8 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 ge/graph/passes/dynamic_single_op_reset_shape_pass.cc create mode 100644 ge/graph/passes/dynamic_single_op_reset_shape_pass.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 26a7ee99..3a0f7638 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -144,6 +144,7 @@ set(TRAIN_SRC_LIST "graph/passes/atomic_addr_clean_pass.cc" "graph/passes/mark_same_addr_pass.cc" "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/passes/dynamic_single_op_reset_shape_pass.cc" "graph/passes/mark_agnostic_pass.cc" "graph/partition/dynamic_shape_partition.cc" "graph/partition/stage_partition.cc" @@ -434,6 +435,7 @@ set(INFER_SRC_LIST "graph/passes/atomic_addr_clean_pass.cc" "graph/passes/mark_same_addr_pass.cc" "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/passes/dynamic_single_op_reset_shape_pass.cc" "graph/passes/mark_agnostic_pass.cc" "graph/common/omg_util.cc" "graph/common/bcast.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 80887e8b..bfb612ea 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -109,6 +109,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ + graph/passes/dynamic_single_op_reset_shape_pass.cc \ graph/passes/mark_agnostic_pass.cc \ graph/common/omg_util.cc \ graph/common/bcast.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index c0f59320..25718e9b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -113,6 +113,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ + graph/passes/dynamic_single_op_reset_shape_pass.cc \ graph/passes/mark_agnostic_pass.cc \ graph/partition/dynamic_shape_partition.cc \ graph/partition/stage_partition.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index aecd87af..acb029e9 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -47,6 +47,8 @@ const char *const kEngineNameDefault = "default"; const char *const kVectorEngine = "VectorEngine"; const char *const kAIcoreEngine = "AIcoreEngine"; const char *const kFileNameSuffix = "online"; +const size_t kDynamicDimSize = 1; +const int64_t kDynamicDimValue = -2; std::map engine_type_map{ {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; @@ -248,6 +250,43 @@ static void GetOpsProtoPath(string &opsproto_path) { opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); } +static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag) { + GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); + change_shape_flag = false; + for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { + auto input_desc = op_desc->MutableInputDesc(static_cast(i)); + GE_CHECK_NOTNULL(input_desc); + // pass scalar input desc + auto dims = input_desc->GetShape().GetDims(); + if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) { + change_shape_flag = true; + } + } + return SUCCESS; +} + +static void ResetTensorVecShape(const vector &inputs, vector &inputs_dynamic) { + for (auto input : inputs) { + auto input_desc = input.GetTensorDesc(); + GeShape shape_ori = input_desc.GetShape(); + + std::vector dynamic_shape_dims = {kDynamicDimValue}; + GeShape dynamic_shape(dynamic_shape_dims); + + ge::GeTensor inputTensor; + ge::GeTensorDesc desc(input_desc); + + bool is_const = false; + (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); + if (!is_const && shape_ori.GetDims().size() > 0) { + desc.SetShape(dynamic_shape); + } + + inputTensor.SetTensorDesc(desc); + inputs_dynamic.push_back(inputTensor); + } +} + class GeGenerator::Impl { public: Impl(OmgContext &omg_context) : omg_context_(omg_context) {} @@ -638,7 +677,18 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in } GeModelPtr &ge_model = name_to_ge_model.begin()->second; GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); - GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); + + bool dynamic_flag = false; + if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { + vector inputs_dynamic; + vector outputs_dynamic; + ResetTensorVecShape(inputs, inputs_dynamic); + ResetTensorVecShape(outputs, outputs_dynamic); + GE_CHK_STATUS_RET_NOLOG( + impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); + } else { + GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); + } GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); return SUCCESS; } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 4f5c190d..46a0ec2e 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -56,6 +56,7 @@ #include "graph/passes/iterator_op_pass.h" #include "graph/passes/link_gen_mask_nodes_pass.h" #include "graph/passes/mark_graph_unknown_status_pass.h" +#include "graph/passes/dynamic_single_op_reset_shape_pass.h" #include "graph/passes/merge_pass.h" #include "graph/passes/merge_input_memcpy_pass.h" #include "graph/passes/merge_to_stream_merge_pass.h" @@ -631,11 +632,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { GE_CHECK_NOTNULL(compute_graph); + PassManager pass_for_dynamic_shape_reset_optimize; + GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass( + "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass)) + GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize); + Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph); + GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph"); + if (ret != SUCCESS && ret != NOT_CHANGED) { + GELOGE(ret, "Run passes when optimize subgraph failed"); + return ret; + } + auto sub_graph_map = partitioner.GetSubGraphMap(); GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.", options_.build_mode.c_str(), options_.build_step.c_str()); - Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); + ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); if (ret != SUCCESS) { GELOGE(ret, "Multiply optimize subgraph failed"); return ret; diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc new file mode 100644 index 00000000..d50b6df9 --- /dev/null +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc @@ -0,0 +1,159 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/dynamic_single_op_reset_shape_pass.h" +#include "common/ge_inner_error_codes.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/type_utils.h" +#include "graph/debug/ge_attr_define.h" + +namespace ge { +namespace { +const int64_t kDynamicShapeDim = -2; +const char *const kEngineNameAiCpu = "DNN_VM_AICPU_ASCEND"; +const char *const kEngineNameAiCpuTf = "DNN_VM_AICPU"; +} // namespace +Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + + std::shared_ptr instance = ge::GELib::GetInstance(); + if (instance == nullptr || !instance->InitFlag()) { + GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Run CompileNodesPass failed."); + return ge::GE_CLI_GE_NOT_INITIALIZED; + } + + // pass if graph has not aicpu node. + bool is_not_aicpu = false; + if (CheckAllAicpuNodes(graph, is_not_aicpu) != SUCCESS) { + GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Check if graph has not aicpu node failed."); + return ge::GE_CLI_GE_NOT_INITIALIZED; + } + if (is_not_aicpu) { + GELOGI("The graph [%s] has not aicpu node, whose aicpu nodes would not be reset dynamic shape", + graph->GetName().c_str()); + return SUCCESS; + } + + for (const auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + // pass input and output node + if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP || + node->GetType() == NETOUTPUT) { + continue; + } + + // pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU + bool single_aicpu_unknown = false; + if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) || + !single_aicpu_unknown) { + continue; + } + + // reset aicpu shape to unknown shape + auto op_desc = node->GetOpDesc(); + if (ResetOpShape(op_desc) != SUCCESS) { + GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Reset node[%s] dynamic shapr failed.", node->GetName().c_str()); + return ge::GE_CLI_GE_NOT_INITIALIZED; + } + GELOGD("Reset dynamic aicpu node [%s] shape success!", node->GetName().c_str()); + } + + GELOGD("Reset dynamic aicpu nodes shape of graph [%s] success!", graph->GetName().c_str()); + return SUCCESS; +} + +Status DynamicSingleOpResetShapePass::CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu) { + is_not_aicpu = false; + for (const auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + // pass input and output node + if (node->GetType() == DATA || node->GetType() == CONSTANT || node->GetType() == CONSTANTOP || + node->GetType() == NETOUTPUT) { + continue; + } + + // find if there are aicpu nodes. + auto op_desc = node->GetOpDesc(); + string engine_name = op_desc->GetOpEngineName(); + if (engine_name.empty()) { + GELOGE(GRAPH_FAILED, "Get engine failed of node[%s].", node->GetName().c_str()); + return GRAPH_FAILED; + } + if (engine_name != kEngineNameAiCpu && engine_name != kEngineNameAiCpuTf) { + is_not_aicpu = true; + return SUCCESS; + } + } + return SUCCESS; +} + +bool DynamicSingleOpResetShapePass::CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc) { + bool is_const = false; + (void)AttrUtils::GetBool(input_tensor_desc, CONST_ATTR_NAME_INPUT, is_const); + return is_const; +} + +Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) { + GE_CHECK_NOTNULL(op_desc); + std::vector dynamic_shape_dims = {kDynamicShapeDim}; + GeShape dynamic_shape(dynamic_shape_dims); + bool reset_shape_flag = false; + if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) { + (void)ResetOutputTensorShape(op_desc, dynamic_shape); + } + return SUCCESS; +} + +Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, + bool &reset_shape_flag) { + reset_shape_flag = false; + GE_CHECK_NOTNULL(op_desc); + for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { + auto input_desc = op_desc->MutableInputDesc(static_cast(i)); + GE_CHECK_NOTNULL(input_desc); + // pass scalar input desc + auto dims_ori = input_desc->GetShape().GetDims(); + if (dims_ori.size() == 0) { + continue; + } + // pass const input + if (CheckIfConstInput(input_desc)) { + continue; + } + reset_shape_flag = true; + input_desc->SetShape(dynamic_shape); + } + return SUCCESS; +} + +Status DynamicSingleOpResetShapePass::ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape) { + GE_CHECK_NOTNULL(op_desc); + for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) { + auto output_desc = op_desc->MutableOutputDesc(static_cast(i)); + GE_CHECK_NOTNULL(output_desc); + // pass scalar input desc + auto output_dims_ori = output_desc->GetShape().GetDims(); + if (output_dims_ori.size() == 0) { + continue; + } + output_desc->SetShape(dynamic_shape); + } + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.h b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h new file mode 100644 index 00000000..897fcac6 --- /dev/null +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ +#define GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ +#include "graph/graph.h" +#include "inc/graph_pass.h" +#include "init/gelib.h" + +namespace ge { +class DynamicSingleOpResetShapePass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + Status ResetOpShape(OpDescPtr &op_desc); + Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag); + Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape); + Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu); + bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_DYNAMIC_SINGLE_OP_RESET_SHAPE_PASS_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index fb065529..6fad46bf 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -182,6 +182,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/mark_same_addr_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/mark_graph_unknown_status_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/mark_agnostic_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/dimension_compute_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc" From 877569858f88218bc8ed61d1084145c387c2b5aa Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 18 Dec 2020 15:38:33 +0800 Subject: [PATCH 04/33] parse bp fp point --- ge/common/profiling/profiling_manager.cc | 37 ++++++++++++++++-------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 214f58f4..f6b1c0ce 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -809,28 +809,41 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP return; } // ProfApi mode and training trace is set - try { - char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; + // Parse options first + char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; + bool is_profiling_valid = false; + std::string profiling_options; + if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && + !profiling_options.empty()) { + is_profiling_valid = true; + } else { INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); if (ret != EN_OK) { GELOGI("PROFILING_OPTIONS env is not exist."); return; } GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); - Json prof_options = Json::parse(env_profiling_options); + profiling_options = env_profiling_options; + is_profiling_valid = true; + } + if (is_profiling_valid) { + try { + Json prof_options = Json::parse(profiling_options); - fp_point_ = prof_options[kFpPoint]; - bp_point_ = prof_options[kBpPoint]; + fp_point_ = prof_options[kFpPoint]; + bp_point_ = prof_options[kBpPoint]; - fp_point = fp_point_; - bp_point = bp_point_; - if (!fp_point_.empty() && !bp_point_.empty()) { - GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + fp_point = fp_point_; + bp_point = bp_point_; + if (!fp_point_.empty() && !bp_point_.empty()) { + GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); + } + } catch (...) { + GELOGW("Json prof options is invalid."); + return; } - } catch (...) { - GELOGE(FAILED, "Json prof options is invalid."); - return; } + return; } From b3005174f071a46d52a4091335ce4fd0947351d9 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 18 Dec 2020 19:59:42 +0800 Subject: [PATCH 05/33] update profiling format --- ge/common/profiling/ge_profiling.cc | 4 +-- ge/common/profiling/profiling_manager.cc | 3 +- ge/common/profiling/profiling_manager.h | 33 ++++++++++--------- inc/framework/common/profiling/ge_profiling.h | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 640f77a1..2717f215 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { GELOGW("Msprof ctrl callback is exist, just ignore it."); } else { - GELOGI("GE register Msprof ctrl callback."); ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); } return ge::SUCCESS; @@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { return ge::PARAM_INVALID; } // Pass MsprofSetDeviceCallback to runtime - GELOGI("GE pass setdevice callback to runtime."); ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast(func)); if (rt_ret != ge::SUCCESS) { GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); @@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le if (type != kProfCommandhandleFinalize) { GE_CHECK_NOTNULL(data); } - ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data; + ProfCommandHandleData *prof_config_param = reinterpret_cast(data); auto iter = kProfCommandTypeMap.find(type); if (iter == kProfCommandTypeMap.end()) { GELOGW("The prof comand type is invalid."); diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index f6b1c0ce..01369a2c 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -805,7 +805,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP if (!fp_point_.empty() && !bp_point_.empty()) { fp_point = fp_point_; bp_point = bp_point_; - GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); + GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), + fp_point.c_str()); return; } // ProfApi mode and training trace is set diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 5fa4fac4..718c8cc9 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -36,21 +36,21 @@ using Json = nlohmann::json; namespace { const std::string GE_PROFILING_MODULE = "Framework"; // DataTypeConfig MASK - #define PROF_ACL_API_MASK 0x0001 - #define PROF_TASK_TIME_MASK 0x0002 - #define PROF_AICORE_METRICS_MASK 0x0004 - #define PROF_AICPU_TRACE_MASK 0x0008 - #define PROF_MODEL_EXECUTE_MASK 0x0010 - #define PROF_RUNTIME_API_MASK 0x0020 - #define PROF_RUNTIME_TRACE_MASK 0x0040 - #define PROF_SCHEDULE_TIMELINE_MASK 0x0080 - #define PROF_SCHEDULE_TRACE_MASK 0x0100 - #define PROF_AIVECTORCORE_METRICS_MASK 0x0200 - #define PROF_SUBTASK_TIME_MASK 0x0400 - #define PROF_TRAINING_TRACE_MASK 0x0800 - #define PROF_HCCL_TRACE_MASK 0x1000 - #define PROF_DATA_PROCESS_MASK 0x2000 - #define PROF_MODEL_LOAD_MASK 0x8000000000000000 + const int PROF_ACL_API_MASK = 0x0001; + const int PROF_TASK_TIME_MASK = 0x0002; + const int PROF_AICORE_METRICS_MASK = 0x0004; + const int PROF_AICPU_TRACE_MASK = 0x0008; + const int PROF_MODEL_EXECUTE_MASK = 0x0010; + const int PROF_RUNTIME_API_MASK = 0x0020; + const int PROF_RUNTIME_TRACE_MASK = 0x0040; + const int PROF_SCHEDULE_TIMELINE_MASK = 0x0080; + const int PROF_SCHEDULE_TRACE_MASK = 0x0100; + const int PROF_AIVECTORCORE_METRICS_MASK = 0x0200; + const int PROF_SUBTASK_TIME_MASK = 0x0400; + const int PROF_TRAINING_TRACE_MASK = 0x0800; + const int PROF_HCCL_TRACE_MASK = 0x1000; + const int PROF_DATA_PROCESS_MASK = 0x2000; + const int PROF_MODEL_LOAD_MASK = 0x8000000000000000; } // namespace namespace ge { @@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env + // is_execute_profiling_ only used by ge option and env + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index e56411c9..1e8b29d7 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -20,7 +20,7 @@ #include "ge/ge_api_error_codes.h" #include "toolchain/prof_callback.h" -#define MAX_DEV_NUM (64) +const int MAX_DEV_NUM = 64; enum ProfCommandHandleType { kProfCommandhandleInit = 0, kProfCommandhandleStart, From e177e5c001925b420d294f37559bf0a64f2ae56b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 18 Dec 2020 20:18:30 +0800 Subject: [PATCH 06/33] optimize log print --- ge/common/profiling/ge_profiling.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 2717f215..43ed6434 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -181,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le if (type != kProfCommandhandleFinalize) { command.module_index = prof_config_param->profSwitch; } - GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); + GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), + command.module_index); if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); } From 535077a0671999ba7bef8e067f1aa2f2785cb581 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 18 Dec 2020 20:22:22 +0800 Subject: [PATCH 07/33] formate ge_profiling.h --- inc/framework/common/profiling/ge_profiling.h | 1 + 1 file changed, 1 insertion(+) diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index 1e8b29d7..f60f60f2 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -21,6 +21,7 @@ #include "toolchain/prof_callback.h" const int MAX_DEV_NUM = 64; + enum ProfCommandHandleType { kProfCommandhandleInit = 0, kProfCommandhandleStart, From 1488a559be71a1b3f1af3f87cd0c07edd84072f4 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Fri, 18 Dec 2020 20:25:43 +0800 Subject: [PATCH 08/33] update format --- ge/common/profiling/profiling_manager.h | 30 +++++++++---------- inc/framework/common/profiling/ge_profiling.h | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 718c8cc9..22fa8f8c 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -36,21 +36,21 @@ using Json = nlohmann::json; namespace { const std::string GE_PROFILING_MODULE = "Framework"; // DataTypeConfig MASK - const int PROF_ACL_API_MASK = 0x0001; - const int PROF_TASK_TIME_MASK = 0x0002; - const int PROF_AICORE_METRICS_MASK = 0x0004; - const int PROF_AICPU_TRACE_MASK = 0x0008; - const int PROF_MODEL_EXECUTE_MASK = 0x0010; - const int PROF_RUNTIME_API_MASK = 0x0020; - const int PROF_RUNTIME_TRACE_MASK = 0x0040; - const int PROF_SCHEDULE_TIMELINE_MASK = 0x0080; - const int PROF_SCHEDULE_TRACE_MASK = 0x0100; - const int PROF_AIVECTORCORE_METRICS_MASK = 0x0200; - const int PROF_SUBTASK_TIME_MASK = 0x0400; - const int PROF_TRAINING_TRACE_MASK = 0x0800; - const int PROF_HCCL_TRACE_MASK = 0x1000; - const int PROF_DATA_PROCESS_MASK = 0x2000; - const int PROF_MODEL_LOAD_MASK = 0x8000000000000000; + const uint64_t PROF_ACL_API_MASK = 0x0001; + const uint64_t PROF_TASK_TIME_MASK = 0x0002; + const uint64_t PROF_AICORE_METRICS_MASK = 0x0004; + const uint64_t PROF_AICPU_TRACE_MASK = 0x0008; + const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010; + const uint64_t PROF_RUNTIME_API_MASK = 0x0020; + const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040; + const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080; + const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100; + const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200; + const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400; + const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800; + const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; + const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; + const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; } // namespace namespace ge { diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index f60f60f2..83699754 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -33,7 +33,7 @@ enum ProfCommandHandleType { struct ProfCommandHandleData { uint64_t profSwitch; - uint32_t devNums; // length of device id list + uint32_t devNums; // length of device id list uint32_t devIdList[MAX_DEV_NUM]; uint32_t modelId; }; From a8cfa7531427d217c97e88888587cc5b0ccb5ef9 Mon Sep 17 00:00:00 2001 From: dajunli Date: Sat, 19 Dec 2020 11:12:21 +0800 Subject: [PATCH 09/33] change hccl inference --- ge/graph/manager/memory_api.cc | 2 +- .../node_executor/hccl/hccl_node_executor.cc | 50 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 45e4bb65..0798eb51 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t }); auto hcom_remote_mem_register = - (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); + (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem"); if (hcom_remote_mem_register == nullptr) { GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); return FAILED; diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 704cab77..eebe2a81 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do GELOGE(FAILED, "hccl handle is nullptr! "); return FAILED; } - auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function))dlsym( - context.handle_, "EnqueueHcomOpertion"); - if (EnqueueHcomOpertion == nullptr) { - GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); + auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function))dlsym( + context.handle_, "HcomExecEnqueueOperation"); + if (HcomExecEnqueueOperation == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function."); if (dlclose(context.handle_) != 0) { GELOGW("Failed to close handle %s", dlerror()); } @@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do const OpDescPtr op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - HcomOpertion op_info; + HcomOperation op_info; op_info.hcclType = op_desc->GetType(); op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; @@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do op_info.root = root_id; auto callback = [this, op_desc](HcclResult status) { if (status != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status); + GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status); } std::lock_guard lock(this->hccl_mutex_); this->cond_.notify_all(); @@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root); op_info.count = count; - HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback); + HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return HCCL_E_INTERNAL; } @@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); - auto EnqueueRemoteAccess = + auto HcomExecEnqueueRemoteAccess = (HcclResult(*)(const string &, const vector &, - std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); - if (EnqueueRemoteAccess == nullptr) { - GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); + std::function))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess"); + if (HcomExecEnqueueRemoteAccess == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function."); if (dlclose(context.handle_) != 0) { GELOGW("Failed to close handle %s", dlerror()); } @@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status); } std::lock_guard lock(this->hccl_mutex_); this->cond_.notify_all(); GELOGI("rdma callback success."); }; - HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); + HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return HCCL_E_INTERNAL; } @@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() { GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); return FAILED; } - auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize"); - if (HcomExcutorInitialize == nullptr) { - GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function."); + auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize"); + if (HcomExecInitialize == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function."); return FAILED; } - HcclResult hccl_ret = HcomExcutorInitialize(); + HcclResult hccl_ret = HcomExecInitialize(); if (hccl_ret == HCCL_E_PTR) { GELOGI("Hccl comm is null, hcom executor initialize is not required."); } else if (hccl_ret == HCCL_SUCCESS) { GELOGI("Hcom executor initialize success."); } else { - GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return FAILED; } return SUCCESS; } Status HcclNodeExecutor::Finalize() { - auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize"); - if (HcomExcutorFinalize == nullptr) { - GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function."); + auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize"); + if (HcomExecFinalize == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function."); return FAILED; } - HcclResult hccl_ret = HcomExcutorFinalize(); + HcclResult hccl_ret = HcomExecFinalize(); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret); + GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret); return FAILED; } // dlclose file handle From 1ea5354d6700704729359bb62e964fbc0ee6b230 Mon Sep 17 00:00:00 2001 From: weiyang Date: Fri, 18 Dec 2020 18:17:39 +0800 Subject: [PATCH 10/33] fix --- ge/graph/load/graph_loader.cc | 22 +++++----- .../load/new_model_manager/davinci_model.cc | 20 +++++----- .../load/new_model_manager/model_manager.cc | 40 +++++++++---------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 44556422..4f32a45a 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string ModelData &model_data) { Status ret; if (!CheckInputPathValid(path)) { - GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); - return GE_EXEC_MODEL_PATH_INVALID; + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); + return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; } GELOGI("Load model begin, model path is: %s", path.c_str()); if (!key_path.empty() && !CheckInputPathValid(key_path)) { - GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); - return GE_EXEC_MODEL_KEY_PATH_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); + return ACL_ERROR_GE_PARAM_INVALID; } ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); @@ -186,13 +186,13 @@ Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); if (ret != SUCCESS) { GE_CHK_RT(rtDeviceReset(0)); - GELOGE(ret, "LoadModel: Load failed."); - return ret; + GELOGE(ACL_ERROR_GE_LOAD_MODEL, "LoadModel: Load failed."); + return ACL_ERROR_GE_LOAD_MODEL; } ret = model_manager->Start(model_id); if (ret != SUCCESS) { if (model_manager->Unload(model_id) != SUCCESS) { - GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start."); + GELOGE(ACL_ERROR_GE_UNLOAD_MODEL, "LoadModel: Unload failed while trying to unload after a failed start."); } GELOGE(ret, "LoadModel: Start failed."); return ret; @@ -233,8 +233,8 @@ Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model Status ret = model_manager->LoadModelOffline( model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); if (ret != SUCCESS) { - GELOGE(ret, "Load model failed, model_id:%u.", model_id); - return ret; + GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); + return ACL_ERROR_GE_LOAD_MODEL; } GELOGI("Load model success, model_id:%u.", model_id); return SUCCESS; @@ -259,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); if (ret != SUCCESS) { - GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); - return ret; + GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id); + return ACL_ERROR_GE_LOAD_MODEL; } GELOGI("Load model with queue success, model_id:%u.", model_id); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 720c3c28..21d90ffd 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh if (weight_ptr == nullptr) { weights_mem_base_ = MallocWeightsMem(weights_size); if (weights_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); - return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } is_inner_weight_base_ = true; } @@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (is_feature_map_mem_has_inited_) { - GELOGE(FAILED, "call InitFeatureMapMem more than once ."); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once ."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } is_feature_map_mem_has_inited_ = true; @@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } mem_base_ = static_cast(dev_ptr); @@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); if (mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); - return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); @@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (p2p_data_size != 0) { p2p_mem_base_ = MallocP2PMem(p2p_data_size); if (p2p_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); - return GE_EXEC_ALLOC_P2P_MEM_FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 803cc15f..3e1ef995 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -215,8 +215,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { auto it = model_map_.find(model_id); if (it == model_map_.end()) { - GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); - return GE_EXEC_MODEL_ID_INVALID; + GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); DestroyAicpuSession(session_id); @@ -384,8 +384,8 @@ Status ModelManager::DeleteModel(uint32_t id) { } else if (hybrid_model_it != hybrid_model_map_.end()) { (void)hybrid_model_map_.erase(hybrid_model_it); } else { - GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); - return GE_EXEC_MODEL_ID_INVALID; + GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); + return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; } return SUCCESS; @@ -902,7 +902,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); @@ -970,8 +970,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, } Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { - std::shared_ptr davinci_model = GetModel(model_id); - GE_CHECK_NOTNULL(davinci_model); + auto davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + "GetCurShape Failed, Invalid Model ID %u!", model_id); davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } @@ -984,7 +985,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHECK_NOTNULL(davinci_model); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + "GetModelAttr Failed, Invalid Model ID %u!", model_id); davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } @@ -994,9 +996,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", - model_id); - + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } @@ -1011,18 +1012,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", - model_id); - + "GetAIPPInfo failed, invalid model_id is %u.", model_id); return davinci_model->GetAIPPInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", - model_id); - + "GetAIPPInfo failed, invalid model_id is %u.", model_id); return davinci_model->GetAippType(index, type, aipp_index); } @@ -1059,8 +1056,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model if (model_helper.GetModelType()) { bool is_shape_unknown = false; GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), - "CheckIsUnknownShape failed, model id:%u", - model_id); + "CheckIsUnknownShape failed, model id:%u", model_id); if (is_shape_unknown || GetContext().GetHostExecFlag()) { return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); } @@ -1078,8 +1074,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; } catch (...) { - GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } ret = davinci_model->Assign(ge_model); if (ret != SUCCESS) { @@ -1091,7 +1087,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { - GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); From 42bf687a43482dd1554c2afdab72895cbe16ca35 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 19 Dec 2020 14:43:29 +0800 Subject: [PATCH 11/33] mult batch --- ge/graph/build/stream_graph_optimizer.cc | 36 ++++++++++++++----- ge/graph/build/stream_graph_optimizer.h | 2 +- ge/graph/build/task_generator.cc | 2 +- .../load/new_model_manager/zero_copy_task.cc | 4 --- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 2933d413..f86f846e 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -48,26 +48,42 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap } } -bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) { +bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) { if (comp_graph == nullptr) { return false; } std::set stream_set; + std::set label_set; for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) { GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue); int64_t stream_id = cur_node->GetOpDesc()->GetStreamId(); if (stream_id == kInvalidStream) { continue; } - GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(), + stream_set.insert(stream_id); + + std::string batch_label; + if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { + label_set.insert(batch_label); + } else { + GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(), + cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id); + continue; + } + + GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); stream_set.insert(stream_id); } - if (stream_set.size() > 1) { - GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.", + if (stream_set.size() > 1 || label_set.size() > 1) { + GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size()); return false; } + + if (!label_set.empty()) { + (void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin()); + } return true; } @@ -99,8 +115,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com continue; } - if (!IsSameStreamId(subgraph)) { - GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str()); + if (!IsSameStreamIdOrBatchLabel(subgraph)) { + GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str()); continue; } OpDescPtr op_desc = nodes.at(0)->GetOpDesc(); @@ -112,9 +128,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com return FAILED; } run_context.stream = run_context.graphStreamList[stream_id]; - GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.", - subgraph->GetName().c_str(), engine_name.c_str(), stream_id, - static_cast(reinterpret_cast(run_context.stream))); + std::string batch_label; + (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); + GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, " + "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, + static_cast(reinterpret_cast(run_context.stream)), batch_label.c_str()); for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { GE_CHECK_NOTNULL(*iter); Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); diff --git a/ge/graph/build/stream_graph_optimizer.h b/ge/graph/build/stream_graph_optimizer.h index b0eea135..d69fa7ba 100644 --- a/ge/graph/build/stream_graph_optimizer.h +++ b/ge/graph/build/stream_graph_optimizer.h @@ -41,7 +41,7 @@ class StreamGraphOptimizer { private: void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map); - bool IsSameStreamId(const ComputeGraphPtr &comp_graph); + bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph); }; } // namespace ge #endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_ diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index b506f945..2089ad31 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector &ops, bool is_ continue; } string op_type = op_desc->GetType(); - if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) { + if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) { continuous_op_lists.emplace_back(vector()); } else { continuous_op_lists.back().emplace_back(op_desc); diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 2609cb4b..98dccb3c 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -124,10 +124,6 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma auto &cur_pair = *iter; uint8_t *args_info = args_info_.data(); for (auto offset : cur_pair.second) { - if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast(args_addr_ + offset))) { - continue; - } - auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); From ab8dc6235a41347ff121100190160d1e536e2b22 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 19 Dec 2020 16:36:37 +0800 Subject: [PATCH 12/33] Delete useless batch label for zero copy. --- .../load/new_model_manager/davinci_model.cc | 198 +----------------- .../load/new_model_manager/davinci_model.h | 46 ---- .../new_model_manager/zero_copy_offset.cc | 10 +- .../load/new_model_manager/zero_copy_offset.h | 2 +- .../load/new_model_manager/zero_copy_task.cc | 49 +---- .../load/new_model_manager/zero_copy_task.h | 8 +- 6 files changed, 15 insertions(+), 298 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 720c3c28..a9d8fc3c 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -970,7 +970,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma uint32_t parent_index = 0; // Ignore subgraph Data Node. if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return InitInputBatchLabel(node); + return SUCCESS; } data_op_list_.push_back(op_desc); @@ -1011,10 +1011,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma } data_op_index++; - if (InitInputZeroCopyNodes(node) != SUCCESS) { - GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); - return PARAM_INVALID; - } return SUCCESS; } @@ -1036,39 +1032,6 @@ void DavinciModel::AdjustDataOpList(const map &data_by_inde } } -/// -/// @ingroup ge -/// @brief input zero copy node Initialize. -/// @param [in] NodePtr: Data Op. -/// @return Status -/// -Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { - auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); - if (out_data_anchor == nullptr) { - GELOGE(FAILED, "Out data anchor is nullptr"); - return FAILED; - } - for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto node = peer_in_data_anchor->GetOwnerNode(); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(FAILED, "Op desc is nullptr"); - return FAILED; - } - string batch_label; - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - batch_label = kDefaultBatchLable; - } - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); - GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), - op_desc->GetId(), batch_label.c_str()); - } - } - return SUCCESS; -} - bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { bool getnext_sink_dynamic = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { @@ -1094,7 +1057,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { if (owner_graph->GetParentGraph() != nullptr) { GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); op_list_.erase(op_desc->GetId()); - return InitOutputBatchLabel(node); + return SUCCESS; } output_op_list_.push_back(op_desc); @@ -1146,8 +1109,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { } } - GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, - GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); GetAllGearsInfo(node); if (is_getnext_sink_dynamic_) { GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, @@ -1343,121 +1304,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector &str_info } } -/// -/// @ingroup ge -/// @brief output zero copy node Initialize. -/// @param [in] NodePtr: netoutput Op. -/// @return Status -/// -Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { - set nodes_need_record; - for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_data_anchor == nullptr) { - continue; - } - auto peer_node = peer_out_data_anchor->GetOwnerNode(); - nodes_need_record.emplace(peer_node); - - // Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios - if (peer_node->GetType() == MERGE) { - for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { - auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); - if (merge_peer_out_data_anchor == nullptr) { - continue; - } - auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); - nodes_need_record.emplace(merge_peer_node); - } - } else { - for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { - auto other_in_node = other_in_data_anchor->GetOwnerNode(); - if (other_in_node->GetType() != NETOUTPUT) { - nodes_need_record.emplace(other_in_node); - } - } - } - } - - for (const auto &node_need_record : nodes_need_record) { - auto op_desc = node_need_record->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string batch_label; - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - batch_label = kDefaultBatchLable; - } - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); - GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); - } - } - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief input zero copy node Initialize. -/// @param [in] NodePtr: Data Op. -/// @return Status -/// -Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); - GE_CHECK_NOTNULL(out_data_anchor); - - for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - const auto &node = peer_in_data_anchor->GetOwnerNode(); - const auto &op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), - op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief output zero copy node Initialize for Case. -/// @param [in] NodePtr: netoutput Op. -/// @return Status -/// -Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_data_anchor == nullptr) { - continue; - } - - const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); - const auto &op_desc = peer_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -3264,27 +3110,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector(args) + offset + i * kAddrLen; - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - } + input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } for (auto &output_outside_addrs : new_output_outside_addrs_) { ZeroCopyOffset &output_outside = output_outside_addrs.second; - bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); - if (ret) { - void *args_val = static_cast(args) + offset + i * kAddrLen; - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - } + output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } } - auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); - if (it == zero_copy_op_id_batch_label_.end()) { + + string batch_label; + if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { zero_copy_task.SetBatchLabel(kDefaultBatchLable); } else { - zero_copy_task.SetBatchLabel(it->second); + zero_copy_task.SetBatchLabel(batch_label); } std::lock_guard lock(outside_addrs_mutex_); @@ -3294,27 +3133,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vectorGetId()); - if (it == zero_copy_op_id_batch_label_.end()) { - return; - } - - const string &batch_label = it->second; - auto iter = zero_copy_batch_label_addrs_.find(batch_label); - if (iter != zero_copy_batch_label_addrs_.end()) { - iter->second.insert(addr); - GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), - op_desc->GetName().c_str()); - } else { - set addrs = {addr}; - zero_copy_batch_label_addrs_.emplace(pair>(batch_label, addrs)); - GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", - batch_label.c_str(), op_desc->GetName().c_str()); - } -} - /// /// @ingroup ge /// @brief Copy Check input size and model op size. diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 27bd4de5..4c507662 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -537,15 +537,6 @@ class DavinciModel { struct timeInfo time_info_; int32_t dataInputTid; - /// - /// @ingroup ge - /// @brief Save Batch label Info. - /// @param [in] const OpDescPtr &op_desc - /// @param [in] uintptr_t addr: address value in args block. - /// @return None. - /// - void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); - /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -657,14 +648,6 @@ class DavinciModel { /// void AdjustDataOpList(const map &data_by_index); - /// - /// @ingroup ge - /// @brief input zero copy node Initialize. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitInputZeroCopyNodes(const NodePtr &node); - /// /// @ingroup ge /// @brief NetOutput Op Initialize. @@ -673,30 +656,6 @@ class DavinciModel { /// Status InitNetOutput(const NodePtr &node); - /// - /// @ingroup ge - /// @brief output zero copy node Initialize. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitOutputZeroCopyNodes(const NodePtr &node); - - /// - /// @ingroup ge - /// @brief input zero copy node Initialize for Case. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitInputBatchLabel(const NodePtr &node); - - /// - /// @ingroup ge - /// @brief output zero copy node Initialize for Case. - /// @param [in] NodePtr: netoutput Op. - /// @return Status - /// - Status InitOutputBatchLabel(const NodePtr &node); - /// /// @ingroup ge /// @brief Constant Op Init. @@ -922,11 +881,6 @@ class DavinciModel { std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. std::set copy_only_addrs_; // Address need copy to original place. - // {op_id, batch_label} - std::map zero_copy_op_id_batch_label_; - // {batch_label, addrs} - std::map> zero_copy_batch_label_addrs_; - std::vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index 9cd3f30b..f27d862d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo addr_count_ = out_count; } -bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { +void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { const auto addr_val = reinterpret_cast(outside_addr); - bool set_batch_label_flag = false; for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { - auto &addrs_mapping_list = GetOutsideAddrs(); - auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); - if (args_addrs != addrs_mapping_list[out_count].end()) { + auto args_addrs = outside_addrs_[out_count].find(outside_addr); + if (args_addrs != outside_addrs_[out_count].end()) { GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); void *args_val = static_cast(args) + offset; args_addrs->second.push_back(args_val); GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, args, offset); - set_batch_label_flag = true; } } - return set_batch_label_flag; } } // namespace ge diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index fa80f28b..8ead742d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -51,7 +51,7 @@ class ZeroCopyOffset { const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, std::vector &tensor_addrs); - bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); + void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); // basic_addr of l2-fusion void *GetBasicAddr() const { return basic_addr_; } diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 2609cb4b..7c6b3963 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -66,68 +66,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { const uint8_t *data = static_cast(info); args_info_.assign(data, data + size); - GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, + GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, args_addr_, args_size_, size); } -/** - * @ingroup ge - * @brief Check is dynamic batch node. - * @param [in] addr: virtual address value from Op. - * @param [in] data: data buffer from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. - * @return: true / false - */ -bool ZeroCopyTask::CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, - uintptr_t addr) { - // Used for dynamic batch / resolution scene - set dynamic_input_addrs; - auto dynamic_input_iter = batch_addrs.find(batch_label); - if (dynamic_input_iter != batch_addrs.end()) { - dynamic_input_addrs = dynamic_input_iter->second; - } - - set fix_input_addrs; - auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); - if (fix_input_iter != batch_addrs.end()) { - fix_input_addrs = fix_input_iter->second; - } - - if (fix_input_addrs.empty()) { - if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { - return false; - } - } else { - if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && - fix_input_addrs.find(addr) == fix_input_addrs.end()) { - return false; - } - } - - return true; -} - /** * @ingroup ge * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: real_data_buffer_addr from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. * @return: void */ -Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, - const string &batch_label) { +Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { auto iter = task_addr_offset_.find(addr); if (iter != task_addr_offset_.end()) { auto &cur_pair = *iter; uint8_t *args_info = args_info_.data(); for (auto offset : cur_pair.second) { - if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast(args_addr_ + offset))) { - continue; - } - auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index d0bb2b6d..efabc814 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -67,12 +67,9 @@ class ZeroCopyTask { * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: data buffer_addr from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. * @return: 0 SUCCESS / others FAILED */ - ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, - const string &batch_label); + ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr); /** * @ingroup ge @@ -91,9 +88,6 @@ class ZeroCopyTask { return batch_label_; } - protected: - bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); - private: const string name_; From cb0f57a44ce68dd3db43c03bf6792eb693bc1b44 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 19 Dec 2020 16:36:37 +0800 Subject: [PATCH 13/33] Delete useless batch label for zero copy. --- .../load/new_model_manager/davinci_model.cc | 207 +----------------- .../load/new_model_manager/davinci_model.h | 46 ---- .../new_model_manager/zero_copy_offset.cc | 10 +- .../load/new_model_manager/zero_copy_offset.h | 2 +- .../load/new_model_manager/zero_copy_task.cc | 49 +---- .../load/new_model_manager/zero_copy_task.h | 8 +- 6 files changed, 18 insertions(+), 304 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 720c3c28..e1ebf216 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -970,7 +970,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma uint32_t parent_index = 0; // Ignore subgraph Data Node. if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return InitInputBatchLabel(node); + return SUCCESS; } data_op_list_.push_back(op_desc); @@ -1011,10 +1011,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma } data_op_index++; - if (InitInputZeroCopyNodes(node) != SUCCESS) { - GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); - return PARAM_INVALID; - } return SUCCESS; } @@ -1036,39 +1032,6 @@ void DavinciModel::AdjustDataOpList(const map &data_by_inde } } -/// -/// @ingroup ge -/// @brief input zero copy node Initialize. -/// @param [in] NodePtr: Data Op. -/// @return Status -/// -Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { - auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); - if (out_data_anchor == nullptr) { - GELOGE(FAILED, "Out data anchor is nullptr"); - return FAILED; - } - for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto node = peer_in_data_anchor->GetOwnerNode(); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(FAILED, "Op desc is nullptr"); - return FAILED; - } - string batch_label; - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - batch_label = kDefaultBatchLable; - } - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); - GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), - op_desc->GetId(), batch_label.c_str()); - } - } - return SUCCESS; -} - bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { bool getnext_sink_dynamic = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { @@ -1094,7 +1057,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { if (owner_graph->GetParentGraph() != nullptr) { GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); op_list_.erase(op_desc->GetId()); - return InitOutputBatchLabel(node); + return SUCCESS; } output_op_list_.push_back(op_desc); @@ -1146,8 +1109,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { } } - GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, - GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); GetAllGearsInfo(node); if (is_getnext_sink_dynamic_) { GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, @@ -1343,121 +1304,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector &str_info } } -/// -/// @ingroup ge -/// @brief output zero copy node Initialize. -/// @param [in] NodePtr: netoutput Op. -/// @return Status -/// -Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { - set nodes_need_record; - for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_data_anchor == nullptr) { - continue; - } - auto peer_node = peer_out_data_anchor->GetOwnerNode(); - nodes_need_record.emplace(peer_node); - - // Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios - if (peer_node->GetType() == MERGE) { - for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { - auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); - if (merge_peer_out_data_anchor == nullptr) { - continue; - } - auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); - nodes_need_record.emplace(merge_peer_node); - } - } else { - for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { - auto other_in_node = other_in_data_anchor->GetOwnerNode(); - if (other_in_node->GetType() != NETOUTPUT) { - nodes_need_record.emplace(other_in_node); - } - } - } - } - - for (const auto &node_need_record : nodes_need_record) { - auto op_desc = node_need_record->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string batch_label; - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - batch_label = kDefaultBatchLable; - } - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); - GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); - } - } - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief input zero copy node Initialize. -/// @param [in] NodePtr: Data Op. -/// @return Status -/// -Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); - GE_CHECK_NOTNULL(out_data_anchor); - - for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - const auto &node = peer_in_data_anchor->GetOwnerNode(); - const auto &op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), - op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief output zero copy node Initialize for Case. -/// @param [in] NodePtr: netoutput Op. -/// @return Status -/// -Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_data_anchor == nullptr) { - continue; - } - - const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); - const auto &op_desc = peer_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -3264,27 +3110,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector(args) + offset + i * kAddrLen; - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - } + input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } for (auto &output_outside_addrs : new_output_outside_addrs_) { ZeroCopyOffset &output_outside = output_outside_addrs.second; - bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); - if (ret) { - void *args_val = static_cast(args) + offset + i * kAddrLen; - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - } + output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } } - auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); - if (it == zero_copy_op_id_batch_label_.end()) { + + string batch_label; + if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { zero_copy_task.SetBatchLabel(kDefaultBatchLable); } else { - zero_copy_task.SetBatchLabel(it->second); + zero_copy_task.SetBatchLabel(batch_label); } std::lock_guard lock(outside_addrs_mutex_); @@ -3294,27 +3133,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vectorGetId()); - if (it == zero_copy_op_id_batch_label_.end()) { - return; - } - - const string &batch_label = it->second; - auto iter = zero_copy_batch_label_addrs_.find(batch_label); - if (iter != zero_copy_batch_label_addrs_.end()) { - iter->second.insert(addr); - GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), - op_desc->GetName().c_str()); - } else { - set addrs = {addr}; - zero_copy_batch_label_addrs_.emplace(pair>(batch_label, addrs)); - GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", - batch_label.c_str(), op_desc->GetName().c_str()); - } -} - /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -3448,15 +3266,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & void *addr = data.second.GetDataInfo().at(count).second; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); - GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), - data.first, addr, size, buffer_addr); + GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", + input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); // For input data, just copy for rts task. for (ZeroCopyTask &task : zero_copy_tasks_) { if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { continue; } uintptr_t addr_val = reinterpret_cast(addr); - if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { + if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { return FAILED; } } @@ -3818,9 +3636,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); is_dynamic_ = input_data.is_dynamic_batch; - if (!is_dynamic_) { - zero_copy_batch_label_addrs_.clear(); - } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, is_dynamic_); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 27bd4de5..4c507662 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -537,15 +537,6 @@ class DavinciModel { struct timeInfo time_info_; int32_t dataInputTid; - /// - /// @ingroup ge - /// @brief Save Batch label Info. - /// @param [in] const OpDescPtr &op_desc - /// @param [in] uintptr_t addr: address value in args block. - /// @return None. - /// - void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); - /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -657,14 +648,6 @@ class DavinciModel { /// void AdjustDataOpList(const map &data_by_index); - /// - /// @ingroup ge - /// @brief input zero copy node Initialize. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitInputZeroCopyNodes(const NodePtr &node); - /// /// @ingroup ge /// @brief NetOutput Op Initialize. @@ -673,30 +656,6 @@ class DavinciModel { /// Status InitNetOutput(const NodePtr &node); - /// - /// @ingroup ge - /// @brief output zero copy node Initialize. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitOutputZeroCopyNodes(const NodePtr &node); - - /// - /// @ingroup ge - /// @brief input zero copy node Initialize for Case. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitInputBatchLabel(const NodePtr &node); - - /// - /// @ingroup ge - /// @brief output zero copy node Initialize for Case. - /// @param [in] NodePtr: netoutput Op. - /// @return Status - /// - Status InitOutputBatchLabel(const NodePtr &node); - /// /// @ingroup ge /// @brief Constant Op Init. @@ -922,11 +881,6 @@ class DavinciModel { std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. std::set copy_only_addrs_; // Address need copy to original place. - // {op_id, batch_label} - std::map zero_copy_op_id_batch_label_; - // {batch_label, addrs} - std::map> zero_copy_batch_label_addrs_; - std::vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index 9cd3f30b..f27d862d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo addr_count_ = out_count; } -bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { +void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { const auto addr_val = reinterpret_cast(outside_addr); - bool set_batch_label_flag = false; for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { - auto &addrs_mapping_list = GetOutsideAddrs(); - auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); - if (args_addrs != addrs_mapping_list[out_count].end()) { + auto args_addrs = outside_addrs_[out_count].find(outside_addr); + if (args_addrs != outside_addrs_[out_count].end()) { GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); void *args_val = static_cast(args) + offset; args_addrs->second.push_back(args_val); GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, args, offset); - set_batch_label_flag = true; } } - return set_batch_label_flag; } } // namespace ge diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index fa80f28b..8ead742d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -51,7 +51,7 @@ class ZeroCopyOffset { const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, std::vector &tensor_addrs); - bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); + void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); // basic_addr of l2-fusion void *GetBasicAddr() const { return basic_addr_; } diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 2609cb4b..7c6b3963 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -66,68 +66,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { const uint8_t *data = static_cast(info); args_info_.assign(data, data + size); - GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, + GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, args_addr_, args_size_, size); } -/** - * @ingroup ge - * @brief Check is dynamic batch node. - * @param [in] addr: virtual address value from Op. - * @param [in] data: data buffer from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. - * @return: true / false - */ -bool ZeroCopyTask::CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, - uintptr_t addr) { - // Used for dynamic batch / resolution scene - set dynamic_input_addrs; - auto dynamic_input_iter = batch_addrs.find(batch_label); - if (dynamic_input_iter != batch_addrs.end()) { - dynamic_input_addrs = dynamic_input_iter->second; - } - - set fix_input_addrs; - auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); - if (fix_input_iter != batch_addrs.end()) { - fix_input_addrs = fix_input_iter->second; - } - - if (fix_input_addrs.empty()) { - if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { - return false; - } - } else { - if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && - fix_input_addrs.find(addr) == fix_input_addrs.end()) { - return false; - } - } - - return true; -} - /** * @ingroup ge * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: real_data_buffer_addr from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. * @return: void */ -Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, - const string &batch_label) { +Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { auto iter = task_addr_offset_.find(addr); if (iter != task_addr_offset_.end()) { auto &cur_pair = *iter; uint8_t *args_info = args_info_.data(); for (auto offset : cur_pair.second) { - if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast(args_addr_ + offset))) { - continue; - } - auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index d0bb2b6d..efabc814 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -67,12 +67,9 @@ class ZeroCopyTask { * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: data buffer_addr from user. - * @param [in] batch_addrs: dynamic batch addr info. - * @param [in] batch_label: batch label. * @return: 0 SUCCESS / others FAILED */ - ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, - const string &batch_label); + ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr); /** * @ingroup ge @@ -91,9 +88,6 @@ class ZeroCopyTask { return batch_label_; } - protected: - bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); - private: const string name_; From 14fac2c688cb987be3b06d879a1c067206d341b9 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 18:24:31 +0800 Subject: [PATCH 14/33] Slog adjust --- ge/offline/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 21221042..39f3d5fe 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -58,6 +58,7 @@ target_include_directories(atc PRIVATE target_link_libraries(atc PRIVATE $ ascend_protobuf + slog ge_common register c_sec @@ -68,7 +69,6 @@ target_link_libraries(atc PRIVATE gflags json runtime_compile - slog static_mmpa -lrt -ldl @@ -118,6 +118,7 @@ target_include_directories(atc_atc.bin PRIVATE target_link_libraries(atc_atc.bin PRIVATE $ ascend_protobuf + slog ge_common register c_sec @@ -128,7 +129,6 @@ target_link_libraries(atc_atc.bin PRIVATE gflags json runtime_compile - slog static_mmpa -lrt -ldl @@ -183,6 +183,7 @@ target_include_directories(fwk_atc.bin PRIVATE target_link_libraries(fwk_atc.bin PRIVATE $ ascend_protobuf + slog ge_common register c_sec @@ -193,7 +194,6 @@ target_link_libraries(fwk_atc.bin PRIVATE gflags json runtime - slog static_mmpa -lrt -ldl From 311f429357c8322c04d653954ab8085dae339227 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 20:01:47 +0800 Subject: [PATCH 15/33] update cmakelist add start-group --- ge/offline/CMakeLists.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 39f3d5fe..c4103809 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -58,13 +58,15 @@ target_include_directories(atc PRIVATE target_link_libraries(atc PRIVATE $ ascend_protobuf - slog ge_common register c_sec graph error_manager + -Wl,--start-group + slog ge_compiler + -Wl,--end-group parser_common gflags json @@ -118,13 +120,15 @@ target_include_directories(atc_atc.bin PRIVATE target_link_libraries(atc_atc.bin PRIVATE $ ascend_protobuf - slog ge_common register c_sec graph error_manager + -Wl,--start-group + slog ge_compiler + -Wl,--end-group parser_common gflags json @@ -183,13 +187,15 @@ target_include_directories(fwk_atc.bin PRIVATE target_link_libraries(fwk_atc.bin PRIVATE $ ascend_protobuf - slog ge_common register c_sec graph error_manager + -Wl,--start-group + slog ge_runner + -Wl,--end-group parser_common gflags json From 7a914446502341fb8605fb9d4d4792a473c0dbba Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 21:41:40 +0800 Subject: [PATCH 16/33] slogd LOG_CPP --- ge/graph/build/memory/CMakeLists.txt | 1 + ge/offline/CMakeLists.txt | 12 +++--------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt index bdd869a9..e988b4ce 100644 --- a/ge/graph/build/memory/CMakeLists.txt +++ b/ge/graph/build/memory/CMakeLists.txt @@ -18,6 +18,7 @@ target_compile_options(ge_memory PRIVATE target_compile_definitions(ge_memory PRIVATE google=ascend_private + LOG_CPP ) target_link_libraries(ge_memory PRIVATE diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index c4103809..21221042 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -63,14 +63,12 @@ target_link_libraries(atc PRIVATE c_sec graph error_manager - -Wl,--start-group - slog ge_compiler - -Wl,--end-group parser_common gflags json runtime_compile + slog static_mmpa -lrt -ldl @@ -125,14 +123,12 @@ target_link_libraries(atc_atc.bin PRIVATE c_sec graph error_manager - -Wl,--start-group - slog ge_compiler - -Wl,--end-group parser_common gflags json runtime_compile + slog static_mmpa -lrt -ldl @@ -192,14 +188,12 @@ target_link_libraries(fwk_atc.bin PRIVATE c_sec graph error_manager - -Wl,--start-group - slog ge_runner - -Wl,--end-group parser_common gflags json runtime + slog static_mmpa -lrt -ldl From b0c1eabedb08d69ba8ad19a2d1627d4795ff13f6 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 15:42:31 +0800 Subject: [PATCH 17/33] update dlog change --- ge/common/CMakeLists.txt | 1 + ge/executor/CMakeLists.txt | 1 + ge/ge_local_engine/CMakeLists.txt | 1 + ge/host_cpu_engine/CMakeLists.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index d196995c..05838df8 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -130,6 +130,7 @@ target_compile_definitions(ge_common_static PRIVATE google=ascend_private $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> + LOG_CPP ) target_compile_options(ge_common_static PRIVATE diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index cc5c1710..d59afd03 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -173,6 +173,7 @@ target_compile_definitions(ge_executor PRIVATE google=ascend_private $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> + LOG_CPP ) target_include_directories(ge_executor PRIVATE diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 615a968f..8f5c9777 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -203,6 +203,7 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE target_compile_definitions(ge_local_opskernel_builder_static PRIVATE google=ascend_private + LOG_CPP ) target_include_directories(ge_local_opskernel_builder_static PRIVATE diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index 97b5a0f5..d5ed7674 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -193,6 +193,7 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE google=ascend_private + LOG_CPP ) target_include_directories(host_cpu_opskernel_builder_static PRIVATE From 19bf1dff1580564284c7918b18e7474b9b5acb1b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Mon, 14 Dec 2020 17:50:20 +0800 Subject: [PATCH 18/33] update atc atc.bin fwk_atc.bin --- ge/offline/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 21221042..af259ecb 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -27,6 +27,7 @@ target_compile_definitions(atc PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 COMPILE_OMG_PACKAGE google=ascend_private + LOG_CPP ) target_include_directories(atc PRIVATE @@ -87,6 +88,7 @@ target_compile_definitions(atc_atc.bin PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 COMPILE_OMG_PACKAGE google=ascend_private + LOG_CPP ) target_include_directories(atc_atc.bin PRIVATE @@ -152,6 +154,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 COMPILE_OMG_PACKAGE google=ascend_private + LOG_CPP ) target_include_directories(fwk_atc.bin PRIVATE From 3b7ac7d954d6f8615264ed7170f29aca3ea21453 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 19 Dec 2020 17:39:25 +0800 Subject: [PATCH 19/33] reset slog.h change --- third_party/fwkacllib/inc/toolchain/slog.h | 25 ---------------------- 1 file changed, 25 deletions(-) diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index 5faca0ae..bce58f32 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,9 +18,7 @@ #define D_SYSLOG_H_ #ifdef __cplusplus -#ifndef LOG_CPP extern "C" { -#endif #endif // __cplusplus #ifndef LINUX @@ -107,7 +105,6 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) -#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -119,18 +116,6 @@ typedef struct tagKV { char *value; } KeyValue; -typedef enum { - APPLICATION = 0, - SYSTEM -} ProcessType; - -typedef struct { - ProcessType type; - unsigned int pid; - unsigned int deviceId; - char reserved[RESERVERD_LENGTH]; -} LogAttr; - /** * @ingroup slog * @@ -243,14 +228,6 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); -/** - * @ingroup slog - * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION - * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) - * @return: 0: SUCCEED, others: FAILED - */ -DLL_EXPORT int DlogSetAttr(LogAttr logAttr); - /** * @ingroup slog * @brief dlog_error: print error log @@ -390,8 +367,6 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus -#ifndef LOG_CPP } -#endif // LOG_CPP #endif // __cplusplus #endif // D_SYSLOG_H_ From a658c30e40b42b07969659ceff472a899c0a5c35 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 19 Dec 2020 18:17:07 +0800 Subject: [PATCH 20/33] mult batch --- ge/graph/build/stream_graph_optimizer.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index f86f846e..05049818 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -73,7 +73,6 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); - stream_set.insert(stream_id); } if (stream_set.size() > 1 || label_set.size() > 1) { GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", From add33b1b7228b486c1c0fe1e16a3d95f0e345a88 Mon Sep 17 00:00:00 2001 From: wjm Date: Mon, 21 Dec 2020 10:12:14 +0800 Subject: [PATCH 21/33] mult batch --- ge/graph/preprocess/multi_batch_copy_graph.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 9ab74d70..a90f145e 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1407,11 +1407,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { - const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); - if (multi_batch_with_case != nullptr) { - PassManager pass_manager; - GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); - return pass_manager.Run(graph); + if (GetLocalOmgContext().dynamic_node_type.empty()) { + const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); + if (multi_batch_with_switchn == nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); + } } if (!GetLocalOmgContext().need_multi_batch) { GELOGI("No need to process_multi for no_train graph."); From 72acae3300d4f11966cf210a07d07a372a7a5960 Mon Sep 17 00:00:00 2001 From: y00500818 Date: Mon, 21 Dec 2020 10:32:28 +0800 Subject: [PATCH 22/33] bugfix for load plugin. --- ge/common/ge/tbe_plugin_manager.cc | 13 ++++++++++--- parser | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 44199c32..0cc7d553 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { void TBEPluginManager::LoadCustomOpLib() { LoadPluginSo(options_); + std::string fmk_type = std::to_string(domi::TENSORFLOW); + auto it = options_.find(ge::FRAMEWORK_TYPE); + if (it != options_.end()) { + fmk_type = it->second; + } std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; GELOGI("The size of registration_datas is: %zu", registration_datas.size()); for (OpRegistrationData reg_data : registration_datas) { - GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), - TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); - domi::OpRegistry::Instance()->Register(reg_data); + if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) { + GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), + TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); + (void)domi::OpRegistry::Instance()->Register(reg_data); + } } } diff --git a/parser b/parser index dc250b93..866d6a2b 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit dc250b93ec6b1f08938cbe4a20091fcf68635d7d +Subproject commit 866d6a2b45ab00a08fbb9b07b5ef04722730c946 From d30ea8306d8d1144f2e160e4b44bad49a8f4ad8b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Mon, 21 Dec 2020 11:21:44 +0800 Subject: [PATCH 23/33] fix format warnings --- ge/common/profiling/profiling_manager.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 01369a2c..994b3eac 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() : is_load_profiling_(false), - is_execute_profiling_(false), - is_training_trace_(false), - subscribe_count_(0) { +ProfilingManager::ProfilingManager() + : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) { prof_cb_.msprofCtrlCallback = nullptr; prof_cb_.msprofReporterCallback = nullptr; } @@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return INTERNAL_ERROR; } is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), - prof_conf.options, options.profiling_options.c_str()); + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, + options.profiling_options.c_str()); } else { (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); From a7a16c40538e941c0edf1624458bcb4fabb657ad Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 21 Dec 2020 12:54:08 +0800 Subject: [PATCH 24/33] Split SinkModelProfile --- .../load/new_model_manager/davinci_model.cc | 230 ++++++++---------- .../load/new_model_manager/davinci_model.h | 26 +- 2 files changed, 130 insertions(+), 126 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index e566edf3..f3ba3d3d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size } // collect profiling for ge + GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); auto &profiling_manager = ProfilingManager::Instance(); if (profiling_manager.ProfilingModelLoadOn()) { Status p_ret = ReportProfilingData(); @@ -2086,12 +2087,62 @@ Status DavinciModel::SyncVarData() { return ret; } -inline int64_t SumSize(const vector &size_list) { - int64_t sum_size = 0; - for (const int64_t &size : size_list) { - sum_size += size; +Status DavinciModel::InitModelProfile() { + for (const auto &task : task_list_) { + GE_CHECK_NOTNULL(task); + const auto &fusion_op_info = task->GetFusionOpInfo(); + // when type is RT_MODEL_TASK_KERNEL, ctx is not null + if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { + continue; + } + + GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); + op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); + } + + std::set task_id_set; + using CIT = std::multimap::const_iterator; + using Range = std::pair; + for (const auto &task : task_list_) { + GE_CHECK_NOTNULL(task); + const auto &fusion_op_info = task->GetFusionOpInfo(); + if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { + continue; + } + + uint32_t task_id = task->GetTaskID(); + uint32_t op_num = fusion_op_info->original_op_names.size(); + uint32_t task_count = 0; + if (task_id_set.count(task->GetTaskID()) != 0) { + continue; + } + + const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); + GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index is out of range, index: %u", fusion_op_info->op_index); + + ProfileInfo profile; + profile.fusion_info = *fusion_op_info; + Range range = op_id_map_.equal_range(fusion_op_info->op_index); + for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { + profile.task_count++; + task_id_set.insert(range_idx->second); + } + + // memory info + TaskMemInfo &mem_info = profile.memory_info; + const auto input_size = ModelUtils::GetInputSize(op_desc); + const auto output_size = ModelUtils::GetOutputSize(op_desc); + const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); + const auto weight_size = ModelUtils::GetWeightSize(op_desc); + mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); + mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); + mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); + mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); + mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; + + profile_list_.emplace_back(profile); } - return sum_size; + return SUCCESS; } Status DavinciModel::SinkModelProfile() { @@ -2099,18 +2150,12 @@ Status DavinciModel::SinkModelProfile() { auto &prof_mgr = ProfilingManager::Instance(); ReporterData reporter_data{}; // report model data tag name - std::string tag_name; - tag_name.append("model_load_info_").append(std::to_string(this->Id())); + std::string tag_name("model_load_info_" + std::to_string(this->Id())); GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // Model Header - string name; - if (!om_name_.empty()) { - name = om_name_; - } else { - name = name_; - } + std::string name = om_name_.empty() ? name_ : om_name_; size_t name_len = name.size(); reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; @@ -2142,126 +2187,67 @@ Status DavinciModel::SinkModelProfile() { GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - int32_t task_num = task_list_.size(); - std::multimap op_id_map; - std::set task_id_set; - for (int32_t i = 0; i < task_num; i++) { - auto task = task_list_[i]; - GE_CHECK_NOTNULL(task); - auto fusion_op_info = task->GetFusionOpInfo(); - // when type is RT_MODEL_TASK_KERNEL, ctx is not null - if (fusion_op_info != nullptr) { - uint32_t op_num = fusion_op_info->original_op_names.size(); - uint32_t task_id = task->GetTaskID(); - if (op_num > 0) { - GELOGI("task.id = %u, opNum = %u", task_id, op_num); - op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); - } - } - } - - struct memoryInfo { - int64_t input_size; - int64_t output_size; - int64_t weight_size; - int64_t workspace_size; - int64_t total_size; - - memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} - }; - using CIT = std::multimap::const_iterator; using Range = std::pair; - for (int32_t i = 0; i < task_num; i++) { - auto task = task_list_[i]; - GE_CHECK_NOTNULL(task); - auto fusion_op_info = task->GetFusionOpInfo(); - if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { - uint32_t task_id = task->GetTaskID(); - uint32_t op_num = fusion_op_info->original_op_names.size(); - uint32_t task_count = 0; - if (task_id_set.count(task_id) != 0) { - continue; - } - - uint32_t op_id = fusion_op_info->op_index; - Range range = op_id_map.equal_range(op_id); - for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { - task_count++; - uint32_t task_id = range_idx->second; - task_id_set.insert(task_id); - } - - // op name after fusion - string fusion_op_name = fusion_op_info->op_name; - int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); - reporter_data.data = (unsigned char *)&fusion_op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)fusion_op_name.c_str(); - reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // original op name before fusion - reporter_data.data = (unsigned char *)&op_num; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - for (uint32_t k = 0; k < op_num; k++) { - std::string op_name = fusion_op_info->original_op_names[k]; - int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); - reporter_data.data = (unsigned char *)&op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - reporter_data.data = (unsigned char *)op_name.c_str(); - reporter_data.dataLen = op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - } - - // stream id info - uint32_t streamId = task->GetStreamId(); - reporter_data.data = (unsigned char *)&streamId; + for (const ProfileInfo &profile : profile_list_) { + // op name after fusion + string fusion_op_name = profile.fusion_info.op_name; + int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); + reporter_data.data = (unsigned char *)&fusion_op_name_len; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + reporter_data.data = (unsigned char *)fusion_op_name.c_str(); + reporter_data.dataLen = fusion_op_name_len; + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // original op name before fusion + uint32_t op_num = profile.fusion_info.original_op_names.size(); + reporter_data.data = (unsigned char *)&op_num; + reporter_data.dataLen = sizeof(uint32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + for (uint32_t k = 0; k < op_num; k++) { + std::string op_name = profile.fusion_info.original_op_names[k]; + int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); + reporter_data.data = (unsigned char *)&op_name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - - // memory info - struct memoryInfo memory_info; - uint32_t op_index = fusion_op_info->op_index; - auto iter = op_list_.find(op_index); - GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); - auto op_desc = iter->second; - memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); - memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); - memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); - memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); - memory_info.total_size = - memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; - reporter_data.data = (unsigned char *)&memory_info; - reporter_data.dataLen = sizeof(struct memoryInfo); + reporter_data.data = (unsigned char *)op_name.c_str(); + reporter_data.dataLen = op_name_len; GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + } - // task info - reporter_data.data = (unsigned char *)&task_count; + // stream id info + uint32_t streamId = profile.fusion_info.stream_id; + reporter_data.data = (unsigned char *)&streamId; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + reporter_data.data = (unsigned char *)&profile.memory_info; + reporter_data.dataLen = sizeof(profile.memory_info); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // task info + reporter_data.data = (unsigned char *)&profile.task_count; + reporter_data.dataLen = sizeof(uint32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); + for (CIT idx = task_range.first; idx != task_range.second; ++idx) { + uint32_t task_id = idx->second; + reporter_data.data = (unsigned char *)&task_id; reporter_data.dataLen = sizeof(uint32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - - Range task_range = op_id_map.equal_range(op_id); - for (CIT idx = task_range.first; idx != task_range.second; ++idx) { - uint32_t task_id = idx->second; - reporter_data.data = (unsigned char *)&task_id; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - } } } return SUCCESS; diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4c507662..4f976968 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -76,6 +76,20 @@ struct timeInfo { int64_t dumpEndTime; }; +struct TaskMemInfo { + int64_t input_size{0}; + int64_t output_size{0}; + int64_t weight_size{0}; + int64_t workspace_size{0}; + int64_t total_size{0}; +}; + +struct ProfileInfo { + FusionOpInfo fusion_info; + TaskMemInfo memory_info; + uint32_t task_count{0}; +}; + enum ExecuteMode { INITIALIZATION, SYNCHRONIZATION, @@ -436,10 +450,6 @@ class DavinciModel { int64_t GetLoadEndTime() { return load_end_time_; } - Status SinkModelProfile(); - - Status SinkTimeProfile(const InputData ¤t_data); - Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { @@ -804,6 +814,11 @@ class DavinciModel { void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); + Status InitModelProfile(); + Status SinkModelProfile(); + + Status SinkTimeProfile(const InputData ¤t_data); + Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, std::vector &outputs); @@ -980,6 +995,9 @@ class DavinciModel { // key: input_index: input is merge node; value: each gear info and each output shape std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; std::vector> all_gears_info_; + + std::multimap op_id_map_; + std::vecotr profile_list_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ From 841393e5c76d1ba87bc58445d2c901c8a5f71bc4 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 21 Dec 2020 13:15:23 +0800 Subject: [PATCH 25/33] Split SinkModelProfile --- .../load/new_model_manager/davinci_model.cc | 17 +++++++++-------- ge/graph/load/new_model_manager/davinci_model.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index f3ba3d3d..eae6de13 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2090,7 +2090,7 @@ Status DavinciModel::SyncVarData() { Status DavinciModel::InitModelProfile() { for (const auto &task : task_list_) { GE_CHECK_NOTNULL(task); - const auto &fusion_op_info = task->GetFusionOpInfo(); + const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); // when type is RT_MODEL_TASK_KERNEL, ctx is not null if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { continue; @@ -2105,20 +2105,17 @@ Status DavinciModel::InitModelProfile() { using Range = std::pair; for (const auto &task : task_list_) { GE_CHECK_NOTNULL(task); - const auto &fusion_op_info = task->GetFusionOpInfo(); + const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { continue; } - uint32_t task_id = task->GetTaskID(); - uint32_t op_num = fusion_op_info->original_op_names.size(); - uint32_t task_count = 0; - if (task_id_set.count(task->GetTaskID()) != 0) { + if (task_id_set.count(task->GetTaskID()) > 0) { continue; } const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); - GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index is out of range, index: %u", fusion_op_info->op_index); + GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index); ProfileInfo profile; profile.fusion_info = *fusion_op_info; @@ -2142,6 +2139,8 @@ Status DavinciModel::InitModelProfile() { profile_list_.emplace_back(profile); } + + GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size()); return SUCCESS; } @@ -2206,7 +2205,7 @@ Status DavinciModel::SinkModelProfile() { // original op name before fusion uint32_t op_num = profile.fusion_info.original_op_names.size(); reporter_data.data = (unsigned char *)&op_num; - reporter_data.dataLen = sizeof(uint32_t); + reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); @@ -2230,6 +2229,7 @@ Status DavinciModel::SinkModelProfile() { GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + // memory info reporter_data.data = (unsigned char *)&profile.memory_info; reporter_data.dataLen = sizeof(profile.memory_info); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, @@ -2250,6 +2250,7 @@ Status DavinciModel::SinkModelProfile() { "Reporter data fail, model id:%u.", this->Id()); } } + return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 4f976968..ede979d0 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -997,7 +997,7 @@ class DavinciModel { std::vector> all_gears_info_; std::multimap op_id_map_; - std::vecotr profile_list_; + std::vector profile_list_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ From 41e6087563bb28f569b7c8b4e806a927e98434cd Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 21 Dec 2020 13:17:30 +0800 Subject: [PATCH 26/33] Split SinkModelProfile --- ge/graph/load/new_model_manager/davinci_model.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index ede979d0..be8efd90 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -240,8 +240,6 @@ class DavinciModel { const vector &GetDataList() const { return data_op_list_; } // get Op - const map &GetOpList() const { return op_list_; } - OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { return nullptr; From 772d6377762f6a9993c04ae0b8cd0d81467666bd Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 21 Dec 2020 15:28:20 +0800 Subject: [PATCH 27/33] Delete useless variable kDefaultBatchLable. --- ge/graph/load/new_model_manager/zero_copy_task.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 7c6b3963..b938f14b 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -22,8 +22,6 @@ #include "common/ge_compiler_options.h" namespace ge { -const char *const kDefaultBatchLable = "Batch_default"; - ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} From 77ab6691913d01707b1da47f743f3d866a05e048 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Mon, 21 Dec 2020 16:07:18 +0800 Subject: [PATCH 28/33] p2p offset set for zero --- ge/graph/build/memory/graph_mem_assigner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index a6da4682..f44e40f0 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() { MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - if (mem_assigner->GetP2PMemOffset() > 0) { + if (mem_assigner->GetP2PMemOffset() >= 0) { MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); } From aca3708d8ee29f1ce5425bdd54763fdefa20145a Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 21 Dec 2020 16:50:49 +0800 Subject: [PATCH 29/33] Eliminate useless interface. --- ge/executor/ge_executor.cc | 82 ------------------- ge/graph/load/graph_loader.cc | 61 +------------- ge/graph/load/graph_loader.h | 6 -- .../task_info/kernel_task_info.cc | 14 ++-- inc/framework/executor/ge_executor.h | 13 --- 5 files changed, 8 insertions(+), 168 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 57ab7800..18d78696 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, namespace ge { bool GeExecutor::isInit_ = false; -class ModelListenerAdapter : public ModelListener { - public: - domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode, - std::vector &outputs) { - if (listener == nullptr) { - GELOGE(ge::FAILED, "listener is null."); - return FAILED; - } - return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs); - } - - std::shared_ptr listener; -}; static void InitOpsProtoManger() { string opsproto_path; @@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add return SUCCESS; } -// Load model -Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, - int32_t priority, std::shared_ptr listener) { - GELOGI("load model offline begin."); - if (!isInit_) { - GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); - return ACL_ERROR_GE_EXEC_NOT_INIT; - } - - string filePath = RealPath(path.c_str()); - if (filePath.empty()) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, - "File path is invalid. please check your text file '%s'.", path.c_str()); - return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; - } - - std::shared_ptr listener_adapter = MakeShared(); - if (listener_adapter == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - listener_adapter->listener = listener; - - Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id); - if (ret != SUCCESS) { - GELOGE(ret, "[GeExecutor] LoadModelFromFile failed"); - return ACL_ERROR_GE_LOAD_MODEL; - } - return SUCCESS; -} - -Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, - std::shared_ptr listener) { - GELOGI("Load model begin."); - if (!isInit_) { - GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); - return ACL_ERROR_GE_EXEC_NOT_INIT; - } - - std::shared_ptr listener_adapter = MakeShared(); - if (listener_adapter == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - listener_adapter->listener = listener; - - Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id); - if (ret != SUCCESS) { - GELOGE(ret, "[GeExecutor] LoadModel failed."); - return ACL_ERROR_GE_LOAD_MODEL; - } - return ret; -} - Status GeExecutor::UnloadModel(uint32_t model_id) { GELOGD("unload model %u begin.", model_id); if (!isInit_) { @@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return SUCCESS; } -Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { - GELOGI("run model begin."); - if (!isInit_) { - GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); - return ACL_ERROR_GE_EXEC_NOT_INIT; - } - - InputData inputs; - GetDomiInputData(input_data, inputs); - OutputData outputs; - GetDomiOutputData(output_data, outputs); - - return GraphExecutor::DataInput(inputs, outputs); -} - // Get input and output descriptor Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc) { diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 4f32a45a..cb68533e 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string return SUCCESS; } -Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, - const std::shared_ptr &listener, uint32_t &model_id) { - Status ret; - ModelData model_data; - ret = LoadDataFromFile(path, key_path, priority, model_data); - if (ret != SUCCESS) { - GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); - if (model_data.model_data != nullptr) { - delete[] static_cast(model_data.model_data); - model_data.model_data = nullptr; - } - return ret; - } - - ret = LoadModel(model_data, listener, model_id); - if (ret != SUCCESS) { - GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); - if (model_data.model_data != nullptr) { - delete[] static_cast(model_data.model_data); - model_data.model_data = nullptr; - } - } - - if (model_data.model_data != nullptr) { - delete[] static_cast(model_data.model_data); - model_data.model_data = nullptr; - } - - return ret; -} - -Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr &listener, - uint32_t &model_id) { - GELOGI("Load model begin, model_id:%u.", model_id); - - // For GeOp, Open Device 0 here. - GE_CHK_RT_RET(rtSetDevice(0)); - auto model_manager = ModelManager::GetInstance(); - GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); - if (ret != SUCCESS) { - GE_CHK_RT(rtDeviceReset(0)); - GELOGE(ACL_ERROR_GE_LOAD_MODEL, "LoadModel: Load failed."); - return ACL_ERROR_GE_LOAD_MODEL; - } - ret = model_manager->Start(model_id); - if (ret != SUCCESS) { - if (model_manager->Unload(model_id) != SUCCESS) { - GELOGE(ACL_ERROR_GE_UNLOAD_MODEL, "LoadModel: Unload failed while trying to unload after a failed start."); - } - GELOGE(ret, "LoadModel: Start failed."); - return ret; - } - GELOGI("LoadModel: Start model success, model_id:%u.", model_id); - return SUCCESS; -} - Status GraphLoader::CommandHandle(const Command &command) { try { auto model_manager = ModelManager::GetInstance(); @@ -225,13 +168,13 @@ Status GraphLoader::CommandHandle(const Command &command) { } Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, - size_t memsize, void *weight_ptr, size_t weightsize) { + size_t mem_size, void *weight_ptr, size_t weight_size) { GELOGI("Load model begin, model_id:%u.", model_id); // For ACL, Open Device from App. auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelOffline( - model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); + model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); if (ret != SUCCESS) { GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); return ACL_ERROR_GE_LOAD_MODEL; diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index 974af5c1..3a13a113 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -44,12 +44,6 @@ class GraphLoader { static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); - static Status LoadModel(const ModelData &model_data, const std::shared_ptr &listener, - uint32_t &model_id); - - static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, - const std::shared_ptr &listener, uint32_t &model_id); - static Status CommandHandle(const Command &command); static Status GetMemoryInfo(int64_t &free); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 364c7ac2..ce31ef30 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; fusion_op_info_.op_name = op_desc_->GetName()); - string session_graph_model_id; - davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); - // get bin_file_key - const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); // new aicpu kernel(rtCpuKernelLaunch) no need to check function if (kernel_type_ == ccKernelType::CCE_AI_CORE) { - rtError_t rt_ret; - rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); + rtError_t rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); return RT_ERROR_TO_GE_STATUS(rt_ret);); } else if (kernel_type_ == ccKernelType::TE) { - rtError_t rt_ret; - rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); + // get bin_file_key + string session_graph_model_id; + davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); + const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); + rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); return RT_ERROR_TO_GE_STATUS(rt_ret);); diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 5a73126f..1b78860d 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -30,8 +30,6 @@ #include "runtime/base.h" namespace ge { -class ModelListenerAdapter; - class SingleOp; class DynamicSingleOp; @@ -55,14 +53,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status Initialize(); ge::Status Finalize(); - // Load model - ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, - std::shared_ptr listener); - ge::Status UnloadModel(uint32_t modelId); - ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); - // Get input and output descriptor ge::Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc = false); @@ -168,9 +160,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); - ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, - std::shared_ptr listener); - ge::Status CommandHandle(const ge::Command &command); ge::Status SetDump(const DumpConfig &dump_config); @@ -297,8 +286,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { private: static bool isInit_; }; - -ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); } // namespace ge #endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ From 6d3d0c91c7881422552c27034c46115adf4fcbeb Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Mon, 21 Dec 2020 19:43:29 +0800 Subject: [PATCH 30/33] dump add origin shape --- ge/common/dump/dump_op.cc | 15 ++++++++++++++- ge/common/proto/op_mapping_info.proto | 2 ++ ge/executor/proto/op_mapping_info.proto | 2 ++ ge/graph/load/new_model_manager/data_dumper.cc | 6 ++++++ ge/proto/op_mapping_info.proto | 2 ++ metadef | 2 +- parser | 2 +- 7 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index e92ada05..4b451dd6 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { for (auto dim : output_descs.at(i).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } + for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) { + output.mutable_origin_shape()->add_dim(dim); + } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { for (auto dim : input_descs.at(i).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } + for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) { + input.mutable_origin_shape()->add_dim(dim); + } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() { SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), dump_path.c_str()); - + uint32_t task_id = 0; + uint32_t stream_id = 0; + rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); + } aicpu::dump::Task task; + task.set_task_id(task_id); + task.set_stream_id(stream_id); task.mutable_op()->set_op_name(op_desc_->GetName()); task.mutable_op()->set_op_type(op_desc_->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { diff --git a/ge/common/proto/op_mapping_info.proto b/ge/common/proto/op_mapping_info.proto index e23b7ebe..7fb6f84b 100644 --- a/ge/common/proto/op_mapping_info.proto +++ b/ge/common/proto/op_mapping_info.proto @@ -15,6 +15,7 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; + Shape origin_shape = 10; } message Input { @@ -23,6 +24,7 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; + Shape origin_shape = 6; } enum BufferType { diff --git a/ge/executor/proto/op_mapping_info.proto b/ge/executor/proto/op_mapping_info.proto index e23b7ebe..7fb6f84b 100644 --- a/ge/executor/proto/op_mapping_info.proto +++ b/ge/executor/proto/op_mapping_info.proto @@ -15,6 +15,7 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; + Shape origin_shape = 10; } message Input { @@ -23,6 +24,7 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; + Shape origin_shape = 6; } enum BufferType { diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index b331d780..6f65e907 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } + for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { + output.mutable_origin_shape()->add_dim(dim); + } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } + for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { + input.mutable_origin_shape()->add_dim(dim); + } int64_t input_size = 0; if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { GELOGI("Get aipp input size according to attr is %ld", input_size); diff --git a/ge/proto/op_mapping_info.proto b/ge/proto/op_mapping_info.proto index e23b7ebe..7fb6f84b 100644 --- a/ge/proto/op_mapping_info.proto +++ b/ge/proto/op_mapping_info.proto @@ -15,6 +15,7 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; + Shape origin_shape = 10; } message Input { @@ -23,6 +24,7 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; + Shape origin_shape = 6; } enum BufferType { diff --git a/metadef b/metadef index 5546f5f4..191b7ad1 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 5546f5f4701130f2dd11a6d69817dc37d52c497e +Subproject commit 191b7ad10e99d0b8d800ce85dae3ef7a2a146870 diff --git a/parser b/parser index 866d6a2b..8317ba6e 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 866d6a2b45ab00a08fbb9b07b5ef04722730c946 +Subproject commit 8317ba6e886da54f4f161bf4a7fc40de27d6ce3c From 4d7cad236c83af3dc21f888eb78f74362333715e Mon Sep 17 00:00:00 2001 From: "wangwenhua1@huawei.com" Date: Mon, 21 Dec 2020 20:15:53 +0800 Subject: [PATCH 31/33] dump add origin shape --- ge/common/dump/dump_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 4b451dd6..0b9e9dcc 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -224,7 +224,7 @@ Status DumpOp::LaunchDumpOp() { uint32_t stream_id = 0; rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); + GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); } aicpu::dump::Task task; task.set_task_id(task_id); From 7fe1a8ae1c2b50d15fb1f9fe0df90d64151f3932 Mon Sep 17 00:00:00 2001 From: lixiwen1 Date: Tue, 22 Dec 2020 10:02:30 +0800 Subject: [PATCH 32/33] Feature: delete unsupptorted paras checklist of aclgrphParse interface --- inc/external/ge/ge_api_types.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 59e8808b..e79e9b62 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -395,19 +395,13 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OP_BANK_UPDATE}; // for interface: aclgrphParse -const std::set ir_parser_suppported_options = {INPUT_FORMAT, - INPUT_SHAPE, - OP_NAME_MAP, - IS_DYNAMIC_INPUT, - INPUT_FP16_NODES, +const std::set ir_parser_suppported_options = {INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, - OUTPUT_TYPE, OUT_NODES, COMPRESS_WEIGHT_CONF, - ENABLE_SCOPE_FUSION_PASSES, - LOG_LEVEL}; + ENABLE_SCOPE_FUSION_PASSES}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, From 67e9ecab8474c291ae1d44e99a41c37256399033 Mon Sep 17 00:00:00 2001 From: lixiwen1 Date: Tue, 22 Dec 2020 10:16:38 +0800 Subject: [PATCH 33/33] Feature: delete unsupptorted paras checklist of aclgrphParse interface --- inc/external/ge/ge_api_types.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index e79e9b62..9b361b96 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -395,13 +395,9 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OP_BANK_UPDATE}; // for interface: aclgrphParse -const std::set ir_parser_suppported_options = {INPUT_FP16_NODES, - IS_INPUT_ADJUST_HW_LAYOUT, - IS_OUTPUT_ADJUST_HW_LAYOUT, - OUTPUT, - OUT_NODES, - COMPRESS_WEIGHT_CONF, - ENABLE_SCOPE_FUSION_PASSES}; +const std::set ir_parser_suppported_options = { + INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, + OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE,