From bed5633f8b08c9d5f11ef7520b302908f3e1583d Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 12 Dec 2020 16:28:13 +0800 Subject: [PATCH 01/23] Calc op running param by GE --- ge/hybrid/executor/node_state.cc | 54 ++++++---- ge/hybrid/executor/node_state.h | 3 +- ge/hybrid/executor/subgraph_executor.cc | 9 +- ge/hybrid/executor/worker/execution_engine.cc | 9 +- .../executor/worker/shape_inference_engine.cc | 100 ++++++++++++++---- .../executor/worker/shape_inference_engine.h | 2 + ge/hybrid/model/node_item.cc | 9 +- ge/hybrid/node_executor/task_context.cc | 22 ++++ ge/hybrid/node_executor/task_context.h | 2 + 9 files changed, 156 insertions(+), 54 deletions(-) diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 033c5304..66eeeba8 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -18,6 +18,7 @@ #include #include "framework/common/debug/log.h" #include "graph/compute_graph.h" +#include "graph/utils/tensor_utils.h" #include "hybrid_execution_context.h" #include "subgraph_context.h" @@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( this->num_pending_shapes_); } -Status ShapeInferenceState::UpdateInputShape(int idx, - const GeShape &ori_shape, - const GeShape &shape) { +Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { if (node_item.IsInputShapeStatic(idx)) { GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", node_item.NodeName().c_str(), idx, node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), - shape.ToString().c_str()); + target.GetShape().ToString().c_str()); return SUCCESS; } - GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", + int64_t tensor_size = -1; + (void) TensorUtils::GetSize(target, tensor_size); + GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", node_item.NodeName().c_str(), idx, - shape.ToString().c_str(), - ori_shape.ToString().c_str()); + target.GetShape().ToString().c_str(), + target.GetOriginShape().ToString().c_str(), + tensor_size); std::lock_guard lk(mu_); auto tensor_desc = node_item.MutableInputDesc(idx); GE_CHECK_NOTNULL(tensor_desc); - tensor_desc->SetShape(shape); - tensor_desc->SetOriginShape(ori_shape); + tensor_desc->SetShape(target.GetShape()); + tensor_desc->SetOriginShape(target.GetOriginShape()); + (void) TensorUtils::SetSize(*tensor_desc, tensor_size); if (--num_pending_shapes_ == 0) { ready_cv_.notify_all(); } @@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex for (auto &p : shape_futures) { auto idx = p.first; auto &future = p.second; - GeShape shape; - GeShape ori_shape; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - GE_CHK_STATUS_RET(future.Get(ori_shape, shape), - "[%s] Get shape failed. index = %u", - node_item.NodeName().c_str(), - idx); + auto src_tensor_desc = future.GetTensorDesc(); + GE_CHECK_NOTNULL(src_tensor_desc); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); + auto input_desc = node_item.MutableInputDesc(idx); + GE_CHECK_NOTNULL(input_desc); + int64_t tensor_size = -1; + (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", node_item.NodeName().c_str(), idx, - shape.ToString().c_str(), - ori_shape.ToString().c_str()); - auto input_desc = node_item.MutableInputDesc(idx); - GE_CHECK_NOTNULL(input_desc); - input_desc->SetShape(std::move(shape)); - input_desc->SetOriginShape(ori_shape); + src_tensor_desc->GetShape().ToString().c_str(), + src_tensor_desc->GetOriginShape().ToString().c_str(), + tensor_size); + input_desc->SetShape(src_tensor_desc->GetShape()); + input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); + (void) TensorUtils::SetSize(*input_desc, tensor_size); } return SUCCESS; @@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); return SUCCESS; } + +GeTensorDescPtr ShapeFuture::GetTensorDesc() { + GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); + if (!subgraph_context_->Await(src_node_)) { + GELOGE(INTERNAL_ERROR, "cancelled"); + return nullptr; + } + return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 04f1ee4b..312e177f 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -35,6 +35,7 @@ class ShapeFuture { ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); ~ShapeFuture() = default; Status Get(GeShape &ori_shape, GeShape &shape); + GeTensorDescPtr GetTensorDesc(); private: NodePtr src_node_; @@ -45,7 +46,7 @@ class ShapeFuture { struct ShapeInferenceState { explicit ShapeInferenceState(const NodeItem &node_item); - Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); + Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); void UpdateInputShapeFuture(int idx, ShapeFuture &&future); diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 5a464f8e..4b6dddab 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOrCreateNodeState(input_node); GE_CHECK_NOTNULL(node_state); - node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); + node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); } } @@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } - - GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); - RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); - GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), - "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); - RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); - GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index e6729352..0d9c7a69 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -20,12 +20,9 @@ #include "graph/utils/tensor_adapter.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/node_executor/node_executor.h" -#include "common/dump/dump_manager.h" +#include "hybrid/executor//worker//shape_inference_engine.h" #include "common/dump/dump_op.h" -#include "common/types.h" -#include "common/ge_types.h" #include "common/profiling/profiling_manager.h" -#include "runtime/base.h" namespace ge { namespace hybrid { @@ -349,6 +346,10 @@ Status NodeDoneCallback::OnNodeDone() { } GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); + if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { + // update output tensor sizes + GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); + } // PropagateOutputs for type == DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { if (graph_context_->trace_enabled) { diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 1d813526..02b3a50b 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -17,9 +17,15 @@ #include "hybrid/executor/worker/shape_inference_engine.h" #include "graph/shape_refiner.h" #include "graph/utils/node_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" +#include "common/math/math_util.h" #include "hybrid/node_executor/node_executor.h" namespace ge { +namespace { +const int kAlignment = 32; +} namespace hybrid { ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) : execution_context_(execution_context), @@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } if (node_item.fused_subgraph != nullptr) { - return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); + GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); + GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); + return SUCCESS; } // Skip shape inference for node of type DEPEND_COMPUTE @@ -66,18 +74,12 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } - // Check again to make sure shape is valid after shape inference - if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { - bool is_unknown_shape = false; - GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), - "Failed to get shape status. node = %s", - node_item.NodeName().c_str()); - - GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, - INTERNAL_ERROR, - "[%s] Shape is still unknown after shape inference.", - node_item.NodeName().c_str()); - } + + // update output tensor sizes after shape inference + // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE + RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); + GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); + RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", node_item.NodeName().c_str(), @@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { // propagate each output for (int i = 0; i < node_item.num_outputs; ++i) { auto output_desc = node_item.op_desc->MutableOutputDesc(i); - const auto &shape = output_desc->MutableShape(); - const auto &ori_shape = output_desc->GetOriginShape(); auto &output_nodes = node_item.outputs[i]; // propagate output to all sub-inputs @@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, std::move(future)); } else { - GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, - ori_shape, - shape)); + GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); } } } @@ -230,5 +228,71 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { } return SUCCESS; } + +Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { + auto op_desc = node_item.GetOpDesc(); + for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { + auto tensor_desc = op_desc->MutableOutputDesc(output_index); + GE_CHECK_NOTNULL(tensor_desc); + const auto &shape = tensor_desc->MutableShape(); + auto dims = shape.GetDims(); + auto dim_num = dims.size(); + if (shape.IsUnknownShape()) { + if (!fallback_with_range) { + GELOGE(INTERNAL_ERROR, "[%s] Shape of output[%zu] is still unknown after shape inference. shape = [%s]", + node_item.NodeName().c_str(), + output_index, + shape.ToString().c_str()); + return INTERNAL_ERROR; + } + + GELOGD("[%s] Calc output[%zu] size by range", node_item.NodeName().c_str(), output_index); + std::vector> shape_range; + GE_CHK_GRAPH_STATUS_RET(tensor_desc->GetShapeRange(shape_range), + "[$s] Failed to get shape range for output: %zu", + node_item.NodeName().c_str(), + output_index); + if (shape_range.size() != dim_num) { + GELOGE(INTERNAL_ERROR, "[%s] Number of shape ranges (%zu) mismatches that of dims (%zu), index = %zu", + node_item.NodeName().c_str(), + shape_range.size(), + dim_num, + output_index); + return INTERNAL_ERROR; + } + + for (size_t dim_index = 0; dim_index < dim_num; ++dim_index) { + if (dims[dim_index] == ge::UNKNOWN_DIM) { + dims[dim_index] = shape_range[dim_index].second; + } + } + } + + uint32_t type_size = 0; + if (!TypeUtils::GetDataTypeLength(tensor_desc->GetDataType(), type_size)) { + GELOGE(INTERNAL_ERROR, "Failed to get data type size"); + return INTERNAL_ERROR; + } + int64_t tensor_size = type_size; + for (const auto &dim : dims) { + GE_CHECK_GE(dim, 0); + GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), + "[%s] Shape size overflow, shape = [%s]", + node_item.NodeName().c_str(), + shape.ToString().c_str()); + tensor_size *= dim; + } + + GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), + "[%s] Output[%zu] Tensor size too large, shape = [%s]", + node_item.NodeName().c_str(), + output_index, + shape.ToString().c_str()); + tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; + (void) TensorUtils::SetSize(*tensor_desc, tensor_size); + } + + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index 7bb9269c..9401ead2 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -34,6 +34,8 @@ class ShapeInferenceEngine { Status PropagateOutputShapes(const NodeItem &node_item); + static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); + private: static Status UpdatePeerNodeShape(const Node &node); Status AwaitDependentNodes(NodeState &node_state); diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index 69cf334d..1fd8fe31 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -22,6 +22,7 @@ #include "graph/debug/ge_attr_define.h" #include "graph/utils/node_utils.h" #include "hybrid/node_executor/node_executor.h" +#include "hybrid/executor/worker/shape_inference_engine.h" namespace ge { namespace hybrid { @@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr GE_CHECK_NOTNULL(dst_op_desc); auto in_idx = node_and_anchor.second->GetIdx(); auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); - fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); + fused_subgraph.input_mapping[static_cast(parent_index)].emplace_back(tensor_desc); GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); } @@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap return FAILED; } - fused_subgraph.output_mapping.emplace(parent_index, op_desc); + fused_subgraph.output_mapping.emplace(static_cast(parent_index), op_desc); return SUCCESS; } @@ -175,6 +176,10 @@ Status NodeItem::Init() { } } + if (is_output_shape_static) { + GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); + } + if (IsControlOp() || node_type == PARTITIONEDCALL) { shape_inference_type = DEPEND_COMPUTE; } else { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 77004f99..d6291c60 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() { } Status TaskContext::RegisterCallback(const std::function &callback_fun) const { + if (callback_fun == nullptr) { + GELOGW("[%s] Callback is NULL", GetNodeName()); + return SUCCESS; + } auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); if (ret != SUCCESS) { GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); @@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); } +void TaskContext::ReleaseInputsAndOutputs() { + for (int i = 0; i < node_item_->num_inputs; ++i) { + auto tensor = inputs_start_ + i; + tensor->Destroy(); + GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); + } + + for (int i = 0; i < node_item_->num_outputs; ++i) { + auto tensor = outputs_start_ + i; + tensor->Destroy(); + GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), index); + } +} + void TaskContext::ReleaseInput(int index) { auto input_tensor = MutableInput(index); if (input_tensor != nullptr) { @@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function &callback_fun) con const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; } + +bool TaskContext::NeedCallback() { + return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 0549a1dc..34754a14 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -50,6 +50,8 @@ class TaskContext { ConstGeTensorDescPtr GetOutputDesc(int index) const; GeTensorDescPtr MutableInputDesc(int index) const; GeTensorDescPtr MutableOutputDesc(int index) const; + void ReleaseInputsAndOutputs(); + bool NeedCallback(); void ReleaseInput(int index); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; From f28de88aaf367068ca2998466bec5d9999f67cfe Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 12 Dec 2020 16:51:15 +0800 Subject: [PATCH 02/23] fix compile error --- ge/hybrid/node_executor/task_context.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index d6291c60..f16bfb2f 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -392,13 +392,13 @@ void TaskContext::ReleaseInputsAndOutputs() { for (int i = 0; i < node_item_->num_inputs; ++i) { auto tensor = inputs_start_ + i; tensor->Destroy(); - GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); + GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); } for (int i = 0; i < node_item_->num_outputs; ++i) { auto tensor = outputs_start_ + i; tensor->Destroy(); - GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), index); + GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); } } From d942b4a57860e9fc8e4d3358b0c0a8f0e6b626b1 Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 12 Dec 2020 18:00:28 +0800 Subject: [PATCH 03/23] fix static checks --- ge/hybrid/executor/node_state.cc | 2 +- .../executor/worker/shape_inference_engine.cc | 125 ++++++++++-------- .../executor/worker/shape_inference_engine.h | 2 + ge/hybrid/model/node_item.cc | 88 +++++++----- ge/hybrid/model/node_item.h | 5 + 5 files changed, 134 insertions(+), 88 deletions(-) diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 66eeeba8..ceed40b0 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -122,7 +122,7 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex GE_CHECK_NOTNULL(input_desc); int64_t tensor_size = -1; (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", node_item.NodeName().c_str(), idx, src_tensor_desc->GetShape().ToString().c_str(), diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 02b3a50b..66d0ede2 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -71,7 +71,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), - "Invoke InferShapeAndType failed."); + "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } @@ -229,66 +229,87 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { return SUCCESS; } +Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, + std::vector &shape, + bool fallback_with_range) { + const auto &tensor_shape = tensor_desc.MutableShape(); + if (tensor_shape.IsUnknownShape()) { + if (!fallback_with_range) { + GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", + tensor_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + + GELOGD("Calc output size by range"); + std::vector> shape_range; + GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); + if (shape_range.size() != shape.size()) { + GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", + shape_range.size(), + shape.size()); + return INTERNAL_ERROR; + } + + for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { + if (shape[dim_index] == ge::UNKNOWN_DIM) { + shape[dim_index] = shape_range[dim_index].second; + } + } + + GELOGD("After canonicalization, shape = [%s], before = [%s]", + GeShape(shape).ToString().c_str(), + tensor_shape.ToString().c_str()); + } + + return SUCCESS; +} + +Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, + const std::vector &shape, + int64_t &tensor_size) { + GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); + uint32_t type_size; + if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { + GELOGE(INTERNAL_ERROR, "Failed to get data type size"); + return INTERNAL_ERROR; + } + + tensor_size = type_size; + for (const auto &dim : shape) { + GE_CHECK_GE(dim, 0); + GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), + "Shape size overflow, shape = [%s]", + GeShape(shape).ToString().c_str()); + tensor_size *= dim; + } + + GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), + "Tensor size is too large: %ld, shape = [%s]", + tensor_size, + GeShape(shape).ToString().c_str()); + tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; + return SUCCESS; +} + Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { auto op_desc = node_item.GetOpDesc(); for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { auto tensor_desc = op_desc->MutableOutputDesc(output_index); GE_CHECK_NOTNULL(tensor_desc); const auto &shape = tensor_desc->MutableShape(); + // modify on copy auto dims = shape.GetDims(); - auto dim_num = dims.size(); - if (shape.IsUnknownShape()) { - if (!fallback_with_range) { - GELOGE(INTERNAL_ERROR, "[%s] Shape of output[%zu] is still unknown after shape inference. shape = [%s]", - node_item.NodeName().c_str(), - output_index, - shape.ToString().c_str()); - return INTERNAL_ERROR; - } - - GELOGD("[%s] Calc output[%zu] size by range", node_item.NodeName().c_str(), output_index); - std::vector> shape_range; - GE_CHK_GRAPH_STATUS_RET(tensor_desc->GetShapeRange(shape_range), - "[$s] Failed to get shape range for output: %zu", - node_item.NodeName().c_str(), - output_index); - if (shape_range.size() != dim_num) { - GELOGE(INTERNAL_ERROR, "[%s] Number of shape ranges (%zu) mismatches that of dims (%zu), index = %zu", - node_item.NodeName().c_str(), - shape_range.size(), - dim_num, - output_index); - return INTERNAL_ERROR; - } - - for (size_t dim_index = 0; dim_index < dim_num; ++dim_index) { - if (dims[dim_index] == ge::UNKNOWN_DIM) { - dims[dim_index] = shape_range[dim_index].second; - } - } - } - - uint32_t type_size = 0; - if (!TypeUtils::GetDataTypeLength(tensor_desc->GetDataType(), type_size)) { - GELOGE(INTERNAL_ERROR, "Failed to get data type size"); - return INTERNAL_ERROR; - } - int64_t tensor_size = type_size; - for (const auto &dim : dims) { - GE_CHECK_GE(dim, 0); - GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), - "[%s] Shape size overflow, shape = [%s]", - node_item.NodeName().c_str(), - shape.ToString().c_str()); - tensor_size *= dim; - } + GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), + "[%s] Failed to canonicalize shape for output %zu", + node_item.NodeName().c_str(), + output_index); - GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), - "[%s] Output[%zu] Tensor size too large, shape = [%s]", + int64_t tensor_size; + GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), + "[%s] Failed to calc tensor size for output %zu", node_item.NodeName().c_str(), - output_index, - shape.ToString().c_str()); - tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; + output_index); + GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); (void) TensorUtils::SetSize(*tensor_desc, tensor_size); } diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index 9401ead2..b946577f 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -37,6 +37,8 @@ class ShapeInferenceEngine { static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); private: + static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector &shape, bool fallback_with_range); + static Status CalcTensorSize(DataType data_type, const std::vector &shape, int64_t &tensor_size); static Status UpdatePeerNodeShape(const Node &node); Status AwaitDependentNodes(NodeState &node_state); diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index 1fd8fe31..eb00f509 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -127,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr &node_ite return SUCCESS; } -Status NodeItem::Init() { - GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); - GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); - num_inputs = static_cast(op_desc->GetInputsSize()); - num_outputs = static_cast(op_desc->GetOutputsSize()); - +void NodeItem::ResolveOptionalInputs() { if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { has_optional_inputs = true; for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { @@ -144,7 +139,18 @@ Status NodeItem::Init() { } } } +} +Status NodeItem::InitInputsAndOutputs() { + GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); + GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); + num_inputs = static_cast(op_desc->GetInputsSize()); + num_outputs = static_cast(op_desc->GetOutputsSize()); + ResolveOptionalInputs(); + return SUCCESS; +} + +Status NodeItem::ResolveDynamicState() { (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); if (!is_dynamic) { @@ -152,42 +158,54 @@ Status NodeItem::Init() { "[%s] Failed to get shape status.", node->GetName().c_str()); } + return SUCCESS; +} - if (is_dynamic) { - for (int i = 0; i < num_inputs; ++i) { - const auto &input_desc = MutableInputDesc(i); - GE_CHECK_NOTNULL(input_desc); - if (input_desc->MutableShape().IsUnknownShape()) { - is_input_shape_static_.push_back(false); - } else { - num_static_input_shapes++; - is_input_shape_static_.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", - NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); - } +Status NodeItem::ResolveStaticInputsAndOutputs() { + for (int i = 0; i < num_inputs; ++i) { + const auto &input_desc = MutableInputDesc(i); + GE_CHECK_NOTNULL(input_desc); + if (input_desc->MutableShape().IsUnknownShape()) { + is_input_shape_static_.push_back(false); + } else { + num_static_input_shapes++; + is_input_shape_static_.push_back(true); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", + NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); } + } - for (int i = 0; i < num_outputs; ++i) { - const auto &output_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(output_desc); - if (output_desc->MutableShape().IsUnknownShape()) { - is_output_shape_static = false; - break; - } + for (int i = 0; i < num_outputs; ++i) { + const auto &output_desc = op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL(output_desc); + if (output_desc->MutableShape().IsUnknownShape()) { + is_output_shape_static = false; + break; } + } - if (is_output_shape_static) { - GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); - } + if (is_output_shape_static) { + GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); + } + return SUCCESS; +} - if (IsControlOp() || node_type == PARTITIONEDCALL) { - shape_inference_type = DEPEND_COMPUTE; - } else { - int32_t unknown_shape_type_val = 0; - (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); - shape_inference_type = static_cast(unknown_shape_type_val); - } +void NodeItem::ResolveUnknownShapeType() { + if (IsControlOp() || node_type == PARTITIONEDCALL) { + shape_inference_type = DEPEND_COMPUTE; + } else { + int32_t unknown_shape_type_val = 0; + (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + shape_inference_type = static_cast(unknown_shape_type_val); + } +} +Status NodeItem::Init() { + GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); + GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); + if (is_dynamic) { + ResolveUnknownShapeType(); + GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); } diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 8fbdc648..99f0d83c 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -103,6 +103,11 @@ struct NodeItem { private: explicit NodeItem(NodePtr node); Status Init(); + Status InitInputsAndOutputs(); + void ResolveOptionalInputs(); + Status ResolveDynamicState(); + Status ResolveStaticInputsAndOutputs(); + void ResolveUnknownShapeType(); std::vector is_input_shape_static_; std::vector input_desc_indices_; From 54b6ce9eea0f78ea3bad270fa3711e7da2155381 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Mon, 14 Dec 2020 21:30:42 +0800 Subject: [PATCH 04/23] Feature: Get default op format from ge graph --- ge/ir_build/ge_ir_build.cc | 110 +++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 17 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index f181170c..34e612a2 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -226,9 +226,11 @@ class Impl { }; ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); + graphStatus CheckInputFormat(const string &input_format); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); - graphStatus UpdateDataOpAttr(const Graph &graph); + graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag); + graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format); + const Graph &graph, string &default_shape, string &input_fo graphStatus UpdateDataOpAttr(const Graph &graph); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); @@ -321,7 +323,62 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } -graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { +graphStatus Impl::CheckInputFormat(const string &input_format) { + if (!input_format.empty()) { + auto iter = ge::input_format_str_to_geformat.find(input_format); + if (iter == ge::input_format_str_to_geformat.end()) { + GELOGE(GRAPH_PARAM_INVALID, "Input format %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.", + input_format.c_str()); + return GRAPH_PARAM_INVALID; + } + } + return GRAPH_SUCCESS; +} + +graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_format) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + string data_op_name = op->GetName(); + GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); + ge::GeTensorDesc tensor = op->GetInputDesc(0); + ge::GeShape data_shape = tensor.GetShape(); + GELOGD("Data op get shape from InputDesc in ge ir graph."); + + const std::vector &tmp_shape = data_shape.GetDims(); + if (tmp_shape.empty()) { + GELOGD("Data op: %s has zero shape dims!", data_op_name.c_str()); + continue; + } + + bool is_dynamic_input = false; + for (auto tmp_dim : tmp_shape) { + if (tmp_dim < 0) { + is_dynamic_input = true; + } + } + + if (is_dynamic_input) { + string tmp_data_format = ge::TypeUtils::FormatToSerialString(tensor.GetFormat()); + if (!default_format.empty() && tmp_data_format!=default_format) { + GELOGE(GRAPH_PARAM_INVALID, "All data op with dynamic shape has no default format!"); + return GRAPH_PARAM_INVALID; + } else if (default_format.empty()) { + default_format.assign(tmp_data_format); + } + GELOGD("Data op name: %s, data format: %s.", data_op_name.c_str(), default_format.c_str()); + } + } + } + GELOGI("Get default data op format: %s from ge ir graph.", default_format.c_str()); + return GRAPH_SUCCESS; +} + +graphStatus Impl::(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { @@ -335,21 +392,30 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape ge::GeShape data_shape = tensor.GetShape(); GELOGD("Data op get shape from InputDesc in ge ir graph."); - string tmp_shape_str; const std::vector &tmp_shape = data_shape.GetDims(); if (tmp_shape.empty()) { GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); - } else { - tmp_shape_str += data_op_name + ":"; - for (auto tmp_dim : tmp_shape) { - tmp_shape_str += to_string((long)tmp_dim) + ","; + continue; + } + + string tmp_shape_str; + bool is_dynamic_input = false; + + tmp_shape_str += data_op_name + ":"; + for (auto tmp_dim : tmp_shape) { + if (tmp_dim < 0) { + is_dynamic_input = true; } - tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); - tmp_shape_str += ";"; - default_shape += tmp_shape_str; + tmp_shape_str += to_string((long)tmp_dim) + ","; } + tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); + tmp_shape_str += ";"; - GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); + if (is_dynamic_input) { + dynamic_shape_flag = true; + default_shape += tmp_shape_str; + GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str(),); + } } } default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); @@ -378,14 +444,24 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Mon, 14 Dec 2020 22:43:44 +0800 Subject: [PATCH 05/23] update thirdparty includes --- CMakeLists.txt | 8 +- ge/CMakeLists.txt | 15 + .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 60 ++++ .../inc/aicpu/common/aicpu_task_struct.h | 4 +- third_party/fwkacllib/inc/cce/aicpu_engine.h | 16 +- .../fwkacllib/inc/cce/aicpu_engine_struct.h | 8 +- .../fwkacllib/inc/cce/fwk_adpt_struct.h | 17 +- third_party/fwkacllib/inc/hccl/base.h | 30 +- third_party/fwkacllib/inc/hccl/hcom.h | 214 +++++------ .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 58 +-- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 6 + third_party/fwkacllib/inc/runtime/base.h | 332 ++---------------- third_party/fwkacllib/inc/runtime/config.h | 23 +- third_party/fwkacllib/inc/runtime/dev.h | 4 +- third_party/fwkacllib/inc/runtime/rt.h | 2 +- third_party/fwkacllib/inc/tdt/status.h | 7 + third_party/fwkacllib/inc/tdt/tsd_client.h | 28 +- .../fwkacllib/inc/toolchain/prof_callback.h | 135 +++++++ .../fwkacllib/inc/toolchain/prof_reporter.h | 26 +- third_party/fwkacllib/inc/toolchain/slog.h | 25 ++ 20 files changed, 522 insertions(+), 496 deletions(-) create mode 100644 third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h create mode 100644 third_party/fwkacllib/inc/toolchain/prof_callback.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 86d0184b..bea12fcc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libslog.so ${ASCEND_ATC_DIR}) @@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -115,7 +115,7 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) + find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 59b804d8..90c341d5 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -603,6 +603,21 @@ set(INFER_SRC_LIST "analyzer/analyzer.cc" ) +if (ENABLE_OPEN_SRC) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) + if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) + execute_process( + COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object + ) + file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) + else() + file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") + set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) + endif() + add_library(msprofiler_fwk OBJECT ${msprof_file}) +endif() + if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h new file mode 100644 index 00000000..7e0f94a8 --- /dev/null +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index c3672663..72e21f6f 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -21,13 +21,15 @@ namespace aicpu { +#pragma pack(push, 1) struct AicpuParamHead { uint32_t length; // Total length: include cunstom message uint32_t ioAddrNum; // Input and output address number uint32_t extInfoLength; // extInfo struct Length uint64_t extInfoAddr; // extInfo address -} __attribute__ ((packed)); +}; +#pragma pack(pop) } // namespace aicpu diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index 740f1200..b83731a8 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef AICPU_ENGINE_H__ #define AICPU_ENGINE_H__ +#include + #ifdef __cplusplus extern "C" { #endif @@ -36,12 +37,23 @@ typedef enum { /** * @ingroup aicpu engine * @brief aeCallInterface: - * a interface to call a function in a op kernfel lib + * a interface to call a function in a op kernfel lib * @param [in] addr void *, should be STR_KERNEL * format * @return aeStatus_t */ aeStatus_t aeCallInterface(void *addr); +/** + * @ingroup aicpu engine + * @brief aeBatchLoadKernelSo: + * a interface to load kernel so + * @param [in] loadSoNum load so number + * @param [in] soPaths load so paths + * @param [in] soNames load so names + * @return aeStatus_t + */ +aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); + #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index a5f43be9..8c0c1847 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -33,18 +33,22 @@ typedef enum { FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; +#pragma pack(push, 1) typedef struct { uint32_t fwkKernelType; // FwkkernelType_t union { ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; } fwkKernelBase; -} __attribute__((packed)) STR_FWK_OP_KERNEL; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) +#pragma pack(push, 1) struct SessionInfo { uint64_t sessionId; uint64_t kernelId; bool sessFlag; -} __attribute__((packed)); +}; +#pragma pack(pop) #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 79d94023..50b39d91 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +#pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; @@ -89,31 +90,39 @@ struct StrFWKKernel { uint64_t extInfoLen; // extend info total length uint64_t extInfoAddr; // extend info addr, ExtInfo structure -} __attribute__((packed)); +}; +#pragma pack(pop) typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) struct ShapeAndType { int32_t type; int64_t dims[kMaxShapeDims]; -} __attribute__((packed)); +}; +#pragma pack(pop) // Extend info structure for extInfoAddr const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg char infoMsg[0]; // extend value -} __attribute__((packed)); +}; +#pragma pack(pop) +#pragma pack(push, 1) struct ResultSummary { uint64_t shape_data_ptr; // shape data addr, need convert to void* uint64_t shape_data_size; // num of dims uint64_t raw_data_ptr; // raw data addr, need convert to void* uint64_t raw_data_size; // size of raw data -} __attribute__((packed)); +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8194097e..9facd20c 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -22,7 +22,8 @@ #ifndef HCCL_BASE_H_ #define HCCL_BASE_H_ - +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -95,6 +96,33 @@ typedef void *rtStream_t; */ typedef void *rtModel_t; +struct HcomOperation { + std::string hcclType; + void *inputPtr; + void *outputPtr; + u64 count; + HcclDataType dataType; + HcclReduceOp opType; + u32 root; + + HcomOperation() + { + inputPtr = nullptr; + outputPtr = nullptr; + count = 0; + dataType = HCCL_DATA_TYPE_RESERVED; + opType = HCCL_REDUCE_RESERVED; + root = 0; + } +}; + +struct HcomRemoteAccessAddrInfo { + u32 remotetRankID; + u64 remoteAddr; // host embedding table address + u64 localAddr; // device HBM address + u64 length; // Memory Length in Bytes +}; + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 90b96ac7..e491d43f 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -24,145 +24,96 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus -/** - * @brief Initialize HCOM. - * - * @param rank_table A string identifying the rank table file path, include file name. - * @param identify A string identifying the identify for the rank. - * @return HcclResult - * @see hcom_destroy() - */ -extern HcclResult hcom_init(const char *rank_table, const char *identify); -/** - * @brief Destroy HCOM - * - * @return HcclResult - * @see hcom_init() - */ -extern HcclResult hcom_destroy(void); - -/** - * @brief Bind the model. - * - * @param model A pointer identifying the model information. - * @param stream A pointer identifying the stream information. - * @return HcclResult - * @see hcom_unbind_model() - */ -extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream); /** - * @brief Unbind the model. + * @brief Get the rank number in the group. * - * @param model An pointer identifying the model information. - * @return HcclResult - * @see hcom_unbind_model() + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return HcclResult */ -extern HcclResult hcom_unbind_model(rtModel_t model); +HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); /** - * @brief All-gather operator. + * @brief Get the rank number in the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param inputCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, - HcclDataType dataType, const char *group, rtStream_t stream); +HcclResult HcomGetRankSize(const char *group, u32 *rankSize); /** - * @brief All-reduce operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); /** - * @brief Broadcast operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param ptr A pointer identifying the data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param root An integer(u32) identifying the the root rank in the operator. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root, - const char *group, rtStream_t stream); +HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); /** - * @brief Reduce-scatter operator. + * @brief Get the rank id of this rank. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_rank_id(const char *group, u32 *rankId); /** - * @brief Get the rank number in the group. + * @brief Get the rank id of this rank. * * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); +HcclResult HcomGetRankId(const char *group, u32 *rankId); /** - * @brief Get the rank number of this rank's server within the group. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); +HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); /** - * @brief Get the rank id of this rank. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); +HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); /** - * @brief Get the local rank id of this rank's server within the group. + * @brief Get the world rank id according to the group rank id. * * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); +HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the world rank id according to the group rank id. @@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the group rank id according to the world rank id. @@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return HcclResult + */ +HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); + /** * @brief Create group. * @@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Destroy group + * @brief Create group. * * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. * @return HcclResult */ -HcclResult hcom_destroy_group(const char *group); +HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Send operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param destRank An integer identifying the destination rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the receive operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType, - u32 destRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult hcom_destroy_group(const char *group); /** - * @brief Receive operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param srcRank An integer identifying the source rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the send operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType, - u32 srcRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult HcomDestroyGroup(const char *group); /** - * @brief Get the gradient split strategy with in the group. + * @brief Set the gradient split strategy with in the group, according to gradient index. * * @param group A string identifying the group name. - * @param feature A pointer identifying the feature of the model. - * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. - * @param segmentNum A pointer identifying the segments number of gradients. - * @param segmentIdx A list identifying the index of end gradient in each segment. - * @return HcclResult + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return HcclResult */ -HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, - u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, - OriginalGraphShapeType shapeType = KNOWN_SHAPE); +extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient index. @@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient data size. @@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); + /** * @brief Register memories and init resources for remote access. * @@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment */ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); + +HcclResult HcomExecInitialize(); + +HcclResult HcomExecFinalize(); + +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ea51f497..ad48f70b 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -215,6 +215,10 @@ typedef struct { #define S_IWRITE S_IWUSR #endif +#define mm_no_argument no_argument +#define mm_required_argument required_argument +#define mm_optional_argument optional_argument + #define M_FILE_RDONLY O_RDONLY #define M_FILE_WRONLY O_WRONLY #define M_FILE_RDWR O_RDWR @@ -227,6 +231,7 @@ typedef struct { #define M_BINARY O_RDONLY #define M_TRUNC O_TRUNC #define M_IRWXU S_IRWXU +#define M_APPEND O_APPEND #define M_IN_CREATE IN_CREATE #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE @@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, - VOID *sendMsg, - INT32 sendLen, - UINT32 sendFlag, - const mmSockAddr* addr, - INT32 tolen); + VOID *sendMsg, + INT32 sendLen, + UINT32 sendFlag, + const mmSockAddr* addr, + INT32 tolen); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, - VOID *recvBuf, - mmSize recvLen, - UINT32 recvFlag, - mmSockAddr* addr, - mmSocklen_t *FromLen); + VOID *recvBuf, + mmSize recvLen, + UINT32 recvFlag, + mmSockAddr* addr, + mmSocklen_t *FromLen); MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); @@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); -MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); +MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, + mmUserBlock_t *timerBlock, + UINT milliSecond, + UINT period); MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); @@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); -MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, - pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, + INT32 fdCount, + INT32 timeout, + mmCompletionHandle handleIOCP, + pmmPollData polledData, + mmPollBack pollBack); MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); @@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); -MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, - INT32 *longIndex); +MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, + char *const *argv, + const char *opts, + const mmStructOption *longOpts, + INT32 *longIndex); MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); @@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); -MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, - mmProcess *id); - -MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, - const mmThreadAttr *threadAttr); +MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, + const mmArgvEnv *env, + const char *stdoutRedirectFile, + mmProcess *id); + +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, + const mmUserBlock_t *funcBlock, + const mmThreadAttr *threadAttr); MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 5db6bbf8..cecdd4a7 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -237,6 +237,11 @@ typedef struct { } mmThreadAttr; typedef VOID (*mmPf)(VOID); + +#define mm_no_argument 0 +#define mm_required_argument 1 +#define mm_optional_argument 2 + #define M_FILE_RDONLY GENERIC_READ #define M_FILE_WRONLY GENERIC_WRITE #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) @@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID); #define M_CREAT _O_CREAT #define M_BINARY _O_BINARY #define M_TRUNC _O_TRUNC +#define M_APPEND _O_APPEND #define M_IREAD _S_IREAD #define M_IRUSR _S_IREAD diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 4e735438..b9b2cbe5 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -18,6 +18,7 @@ #define __CCE_RUNTIME_BASE_H__ #include +#include "toolchain/prof_callback.h" #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { @@ -32,309 +33,8 @@ extern "C" { #endif #endif -/** - * @ingroup dvrt_base - * @brief runtime error numbers. - */ -typedef enum tagRtError { - RT_ERROR_NONE = 0x0, // success - - RT_ERROR_DEVICE_BASE = 0x07010000, - RT_ERROR_DEVICE_NULL, - RT_ERROR_DEVICE_NEW, - RT_ERROR_DEVICE_ID, - RT_ERROR_DEVICE_CHIPTYPE, - RT_ERROR_DEVICE_DEPLOY, - RT_ERROR_DEVICE_RETAIN, - RT_ERROR_DEVICE_PLATFORM, - RT_ERROR_DEVICE_LOADER, - RT_ERROR_DEVICE_LIMIT, - RT_ERROR_DEVICE_PROC_HANG_OUT, - RT_ERROR_DEVICE_POWER_UP_FAIL, - RT_ERROR_DEVICE_POWER_DOWN_FAIL, - RT_ERROR_DEVICE_INVALID, - - RT_ERROR_DRV_BASE = 0x07020000, - RT_ERROR_DRV_NULL, - RT_ERROR_DRV_NEW, - RT_ERROR_DRV_MEMORY, - RT_ERROR_DRV_INPUT, - RT_ERROR_DRV_PTRNULL, - RT_ERROR_DRV_OPEN_AICPU, - RT_ERROR_DRV_CLOSE_AICPU, - RT_ERROR_DRV_SYM_AICPU, - RT_ERROR_DRV_OPEN_TSD, - RT_ERROR_DRV_CLOSE_TSD, - RT_ERROR_DRV_SYM_TSD, - RT_ERROR_DRV_SOURCE, - RT_ERROR_DRV_REPORT, - RT_ERROR_DRV_COMMAND, - RT_ERROR_DRV_OCCUPY, - RT_ERROR_DRV_ERR, - - RT_ERROR_STREAM_BASE = 0x07030000, - RT_ERROR_STREAM_NULL, - RT_ERROR_STREAM_NEW, - RT_ERROR_STREAM_CONTEXT, - RT_ERROR_STREAM_INVALID, - RT_ERROR_STREAM_MODEL, - RT_ERROR_STREAM_FUSION, - RT_ERROR_STREAM_FULL, - RT_ERROR_STREAM_EMPTY, - RT_ERROR_STREAM_NOT_COMPLETE, - RT_ERROR_STREAM_SYNC, - RT_ERROR_STREAM_NO_CB_REG, - RT_ERROR_STREAM_DUPLICATE, - RT_ERROR_STREAM_NOT_EXIST, - RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE, - RT_ERROR_SQID_FULL, - - RT_ERROR_MODEL_BASE = 0x07040000, - RT_ERROR_MODEL_NULL, - RT_ERROR_MODEL_NEW, - RT_ERROR_MODEL_CONTEXT, - RT_ERROR_MODEL_ENDGRAPH, - RT_ERROR_MODEL_STREAM, - RT_ERROR_MODEL_EXCUTOR, - RT_ERROR_MODEL_SETUP, - RT_ERROR_MODEL_ID, - RT_ERROR_MODEL_EXE_FAILED, - RT_ERROR_END_OF_SEQUENCE, // end of sequence - RT_ERROR_MODEL_EXIT, - RT_ERROR_MODEL_EXIT_STREAM_UNBIND, - RT_ERROR_MODEL_EXIT_ID, - RT_ERROR_MODEL_ABORT_NORMAL, - - RT_ERROR_EVENT_BASE = 0x07050000, - RT_ERROR_EVENT_NULL, - RT_ERROR_EVENT_NEW, - RT_ERROR_EVENT_RECORDER_NULL, - RT_ERROR_EVENT_TIMESTAMP_INVALID, - RT_ERROR_EVENT_TIMESTAMP_REVERSAL, - RT_ERROR_EVENT_NOT_COMPLETE, - - RT_ERROR_NOTIFY_BASE = 0x07060000, - RT_ERROR_NOTIFY_NULL, - RT_ERROR_NOTIFY_NEW, - RT_ERROR_NOTIFY_TYPE, - RT_ERROR_NOTIFY_NOT_COMPLETE, - - RT_ERROR_CONTEXT_BASE = 0x07070000, - RT_ERROR_CONTEXT_NULL, - RT_ERROR_CONTEXT_NEW, - RT_ERROR_CONTEXT_DEL, - RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL, - RT_ERROR_CONTEXT_ONLINE_STREAM_NULL, - - RT_ERROR_KERNEL_BASE = 0x07080000, - RT_ERROR_KERNEL_NULL, - RT_ERROR_KERNEL_NEW, - RT_ERROR_KERNEL_LOOKUP, - RT_ERROR_KERNEL_NAME, - RT_ERROR_KERNEL_TYPE, - RT_ERROR_KERNEL_OFFSET, - RT_ERROR_KERNEL_DUPLICATE, - RT_ERROR_KERNEL_UNREGISTERING, - - RT_ERROR_PROGRAM_BASE = 0x07090000, - RT_ERROR_PROGRAM_NULL, - RT_ERROR_PROGRAM_NEW, - RT_ERROR_PROGRAM_DATA, - RT_ERROR_PROGRAM_SIZE, - RT_ERROR_PROGRAM_MEM_TYPE, - RT_ERROR_PROGRAM_MACHINE_TYPE, - RT_ERROR_PROGRAM_USEOUT, - - RT_ERROR_MODULE_BASE = 0x070a0000, - RT_ERROR_MODULE_NULL, - RT_ERROR_MODULE_NEW, - - RT_ERROR_INSTANCE_BASE = 0x070b0000, - RT_ERROR_INSTANCE_NULL, - RT_ERROR_INSTANCE_NEW, - RT_ERROR_INSTANCE_VERSION, - - RT_ERROR_API_BASE = 0x070c0000, - RT_ERROR_API_NULL, - RT_ERROR_API_NEW, - - RT_ERROR_DATADUMP_BASE = 0x070d0000, - RT_ERROR_DATADUMP_NULL, - RT_ERROR_DATADUMP_NEW, - RT_ERROR_DATADUMP_TIME, - RT_ERROR_DATADUMP_FILE, - RT_ERROR_DATADUMP_ADDRESS, - RT_ERROR_DATADUMP_LOAD_FAILED, - RT_ERROR_DUMP_ADDR_SET_FAILED, - - RT_ERROR_PROF_BASE = 0x070e0000, - RT_ERROR_PROF_NULL, - RT_ERROR_PROF_NEW, - RT_ERROR_PROF_START, - RT_ERROR_PROF_DEVICE_MEM, - RT_ERROR_PROF_HOST_MEM, - RT_ERROR_PROF_SET_DIR, - RT_ERROR_PROF_OPER, - RT_ERROR_PROF_FULL, - RT_ERROR_PROF_NAME, - - RT_ERROR_PCTRACE_BASE = 0x070f0000, - RT_ERROR_PCTRACE_NULL, - RT_ERROR_PCTRACE_NEW, - RT_ERROR_PCTRACE_TIME, - RT_ERROR_PCTRACE_FILE, - - RT_ERROR_TASK_BASE = 0x07100000, - RT_ERROR_TASK_NULL, - RT_ERROR_TASK_NEW, - RT_ERROR_TASK_TYPE, - RT_ERROR_TASK_ALLOCATOR, - - RT_ERROR_COMMON_BASE = 0x07110000, - RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID - RT_ERROR_MEMORY_ADDRESS_UNALIGNED, - RT_ERROR_SEC_HANDLE, - RT_ERROR_OS_HANDLE, - RT_ERROR_MUTEX_LOCK, - RT_ERROR_MUTEX_UNLOCK, - RT_ERROR_CALLOC, - RT_ERROR_POOL_RESOURCE, - RT_ERROR_TRANS_ARGS, - RT_ERROR_METADATA, - RT_ERROR_LOST_HEARTBEAT, - RT_ERROR_REPORT_TIMEOUT, - RT_ERROR_FEATURE_NOT_SUPPROT, - RT_ERROR_MEMORY_ALLOCATION, - RT_ERROR_MEMORY_FREE, - RT_ERROR_INVALID_MEMORY_TYPE, - - RT_ERROR_DEBUG_BASE = 0x07120000, - RT_ERROR_DEBUG_NULL, - RT_ERROR_DEBUG_NEW, - RT_ERROR_DEBUG_SIGNAL, - RT_ERROR_DEBUG_OPEN, - RT_ERROR_DEBUG_WRITE, - RT_ERROR_DEBUG_REGISTER_FAILED, - RT_ERROR_DEBUG_UNREGISTER_FAILED, - - RT_ERROR_ENGINE_BASE = 0x07130000, - RT_ERROR_ENGINE_NULL, - RT_ERROR_ENGINE_NEW, - RT_ERROR_ENGINE_THREAD, - - RT_ERROR_LABEL_BASE = 0x07140000, - RT_ERROR_LABEL_NULL, - RT_ERROR_LABEL_NEW, - RT_ERROR_LABEL_CONTEXT, - RT_ERROR_LABEL_STREAM, - RT_ERROR_LABEL_MODEL, - RT_ERROR_LABEL_ALLOCATOR, - RT_ERROR_LABEL_FREE, - RT_ERROR_LABEL_SET, - RT_ERROR_LABEL_ID, - - RT_ERROR_TSFW_BASE = 0x07150000, - RT_ERROR_TSFW_UNKNOWN, - RT_ERROR_TSFW_NULL_PTR, - RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID, - RT_ERROR_TSFW_ILLEGAL_PARAM, - RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL, - RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED, - RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE, - RT_ERROR_TSFW_L2_MALLOC_FAILED, - RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED, - RT_ERROR_TSFW_MEMCPY_OP_FAILED, - RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED, - RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY, - RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED, - RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE, - RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED, - RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND, - RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_SQNODE_NOT_ENOUGH, - RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQ_REPORT_FAILED, - RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS, - RT_ERROR_TSFW_SYS_DMA_RESET_FAILED, - RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED, - RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY, - RT_ERROR_TSFW_TIMER_EVENT_FULL, - RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH, - RT_ERROR_TSFW_AICORE_TIMEOUT, - RT_ERROR_TSFW_AICORE_EXCEPTION, - RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION, - RT_ERROR_TSFW_AICPU_TIMEOUT, - RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL, - RT_ERROR_TSFW_AICPU_EXCEPTION, - RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR, - RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR, - RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM, - RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT, - RT_ERROR_TSFW_DEBUG_INVALID_SQCQ, - RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE, - RT_ERROR_TSFW_DEBUG_CMD_PROCESS, - RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS, - RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS, - RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS, - RT_ERROR_TSFW_DEBUG_TASK_EMPTY, - RT_ERROR_TSFW_DEBUG_TASK_FULL, - RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_FULL, - RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION, - RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT, - RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL, - RT_ERROR_TSFW_DEBUG_READ_ERROR, - RT_ERROR_TSFW_DEBUG_WRITE_FAIL, - RT_ERROR_TSFW_QUEUE_FULL, - RT_ERROR_TSFW_QUEUE_EMPTY, - RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL, - RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE, - RT_ERROR_TSFW_INVLD_CPY_DIR, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES, - RT_ERROR_TSFW_PCIE_DMA_CPY_ERR, - RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY, - RT_ERROR_TSFW_PROFILE_BUFF_FULL, - RT_ERROR_TSFW_PROFILE_MODE_CONFLICT, - RT_ERROR_TSFW_PROFILE_OTHER_PID_ON, - RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED, - RT_ERROR_TSFW_TSCPU_CLOSE_FAILED, - RT_ERROR_TSFW_EXPECT_FAIL, - RT_ERROR_TSFW_REPEAT_MODEL_STREAM, - RT_ERROR_TSFW_STREAM_MODEL_UNBIND, - RT_ERROR_TSFW_MODEL_EXE_FAILED, - RT_ERROR_TSFW_IPC_SEND_FAILED, - RT_ERROR_TSFW_IPC_PROC_REG_FAILED, - RT_ERROR_TSFW_STREAM_FULL, - RT_ERROR_TSFW_END_OF_SEQUENCE, - RT_ERROR_TSFW_SWITCH_STREAM_LABEL, - RT_ERROR_TSFW_TRANS_SQE_FAIL, - RT_ERROR_TSFW_RESERVED, - - RT_ERROR_SUBSCRIBE_BASE = 0x07160000, - RT_ERROR_SUBSCRIBE_NULL, - RT_ERROR_SUBSCRIBE_NEW, - RT_ERROR_SUBSCRIBE_STREAM, - RT_ERROR_SUBSCRIBE_THREAD, - RT_ERROR_SUBSCRIBE_GROUP, - - RT_ERROR_GROUP_BASE = 0x07170000, - RT_ERROR_GROUP_NOT_SET, - RT_ERROR_GROUP_NOT_CREATE, - - RT_ERROR_RESERVED = 0x07ff0000, - }rtError_t; +typedef int32_t rtError_t; +static const int32_t RT_ERROR_NONE = 0; // success /** * @ingroup dvrt_base @@ -387,10 +87,20 @@ typedef struct rtExceptionInfo { uint32_t deviceid; } rtExceptionInfo; +typedef struct rtTaskFailInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; +} rtTaskFailInfo; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); + typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts set profiling reporter callback. + */ +RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); + /** * @ingroup dvrt_base * @brief Returns the last error from a runtime call. @@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); */ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for fail task + * @param [in] uniName unique register name, can't be null + * @param [in] callback fail task callback function + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index f1a70eaa..12a407d7 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; -/** - * @ingroup - * @brief get platform - * @param [in] platForm - * @return platForm - */ -RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); - /** * @ingroup * @brief get AI core count @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); -/** - * @ingroup - * @brief set platform in gen ctx - * @param [in] platForm - * @return RT_ERROR_NONE for ok, errno for failed - */ -RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); /** * @ingroup @@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); */ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); +/** + * @ingroup + * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. + * @param [out] runtimeVersion + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index b378e3b0..d1a91a9b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 FEATURE_TYPE_MEMCPY = 0, FEATURE_TYPE_RSV, } rtFeatureType_t; - * @param [in] infoType info type + * @param [in] featureInfo info type typedef enum tagMemcpyInfo { MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, MEMCPY_INFO _RSV, } rtMemcpyInfo_t; - * @param [out] value the capability info + * @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index d3d5956f..83cafa3c 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -28,4 +28,4 @@ #include "rt_model.h" #include "stream.h" -#endif // __CCE_RUNTIME_RT_H__ \ No newline at end of file +#endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index d30564b8..d5050f35 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t; typedef uint32_t TDT_StatusT; #endif +#define LINUX 0 +#define WINDOWS 1 + #ifndef TDT_LIB_EXPORT +#if(TARGET_SYSTEM_NAME == WINDOWS) +#define TDT_LIB_EXPORT __declspec(dllexport) +#else #define TDT_LIB_EXPORT __attribute__((visibility("default"))) #endif +#endif /** * @ingroup tdt status. * diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 6066a12e..665c8b82 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -23,6 +23,7 @@ #include #include "tdt/status.h" #include "tdt/data_common.h" +#include "toolchain/prof_callback.h" #ifdef __cplusplus extern "C" { @@ -37,7 +38,7 @@ extern "C" { * Used for the Framework process to communicate with the TSDDaemon process, * and notify TSD to complete the initialization of other processes * -* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param logicDeviceId [IN] type #unsigned int. Logic device ID * @param rankSize [IN] type #unsigned int. The rankSize of the training. * The default value is 1. When rankSize is greater than 1, * HCCP will be pulled to perform set communication related operations. @@ -49,7 +50,7 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); /** * @ingroup Close @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); +TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); +TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); + +/** +* @ingroup TsdSetMsprofReporterCallback +* @brief 用于推理场景下设置aicpu的profilng的callback函数 +* +* @par Function +* 设置offline模式下aicpu_sd进程的profiling的callback函数 +* +* @param callback [IN] type #MsprofReporterCallback. 回调函数 +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined +*/ +TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); /** * @ingroup CreateCmdParameterObj diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h new file mode 100644 index 00000000..3fad74bc --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -0,0 +1,135 @@ +/** + * Copyright 2020-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file prof_callback.h + * @brief declaraion of profiling callbacks + */ + +#ifndef MSPROFILER_PROF_CALLBACK_H_ +#define MSPROFILER_PROF_CALLBACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#include "stddef.h" +#include "stdint.h" + +/** + * @name MsprofErrorCode + * @brief error code + */ +enum MsprofErrorCode { + MSPROF_ERROR_NONE = 0, + MSPROF_ERROR_MEM_NOT_ENOUGH, + MSPROF_ERROR_GET_ENV, + MSPROF_ERROR_CONFIG_INVALID, + MSPROF_ERROR_ACL_JSON_OFF, + MSPROF_ERROR, +}; + +#define MSPROF_ENGINE_MAX_TAG_LEN (31) + +/** + * @name ReporterData + * @brief struct of data to report + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen + int deviceId; // the index of device + size_t dataLen; // the length of send data + unsigned char *data; // the data content +}; + +/** + * @name MsprofReporterModuleId + * @brief module id of data to report + */ +enum MsprofReporterModuleId { + MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS + MSPROF_MODULE_HCCL, // HCCL + MSPROF_MODULE_ACL, // AclModule + MSPROF_MODULE_FRAMEWORK, // Framework + MSPROF_MODULE_RUNTIME // runtime +}; + +/** + * @name MsprofReporterCallbackType + * @brief reporter callback request type + */ +enum MsprofReporterCallbackType { + MSPROF_REPORTER_REPORT = 0, // report data + MSPROF_REPORTER_INIT, // init reporter + MSPROF_REPORTER_UNINIT, // uninit reporter +}; + +/** + * @name MsprofReporterCallback + * @brief callback to start reporter/stop reporter/report date + * @param moduleId [IN] enum MsprofReporterModuleId + * @param type [IN] enum MsprofReporterCallbackType + * @param data [IN] callback data (nullptr on INTI/UNINIT) + * @param len [IN] callback data size (0 on INIT/UNINIT) + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); + + +#define MSPROF_OPTIONS_DEF_LEN_MAX (2048) + +/** + * @name MsprofGeOptions + * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS + */ +struct MsprofGeOptions { + char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; + char options[MSPROF_OPTIONS_DEF_LEN_MAX]; +}; + +/** + * @name MsprofCtrlCallbackType + * @brief ctrl callback request type + */ +enum MsprofCtrlCallbackType { + MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env + MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options + MSPROF_CTRL_FINALIZE // stop profiling +}; + +/** + * @name MsprofCtrlCallback + * @brief callback to start/stop profiling + * @param type [IN] enum MsprofCtrlCallbackType + * @param data [IN] callback data + * @param len [IN] callback data size + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); + +/** + * @name MsprofSetDeviceCallback + * @brief callback to notify set/reset device + * @param devId [IN] device id + * @param isOpenDevice [IN] true: set device, false: reset device + */ +typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index c734380c..ff91351b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -16,7 +16,17 @@ #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ #define MSPROF_ENGINE_PROF_REPORTER_H_ +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else #define MSVP_PROF_API __attribute__((visibility("default"))) +#endif + +#include "prof_callback.h" /** * @file prof_reporter.h @@ -25,20 +35,6 @@ */ namespace Msprof { namespace Engine { -/// the max tag length -#define MSPROF_ENGINE_MAX_TAG_LEN (31) -/** - * @ingroup reporter - * @brief struct ReporterData - * the sturct of the data send to libmsprof - */ -struct ReporterData { - char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen - int deviceId; ///< the physical id of device - size_t dataLen; ///< the length of send data - unsigned char *data; ///< the data content -}; - /** * @ingroup reporter * @brief class Reporter @@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter { } // namespace Engine } // namespace Msprof -#endif // MSPROF_ENGINE_PROF_REPORTER_H_ \ No newline at end of file +#endif // MSPROF_ENGINE_PROF_REPORTER_H_ diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..5faca0ae 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus #endif // D_SYSLOG_H_ From c85ad855e08b95e30866011e0a0228cd495e768e Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 15 Dec 2020 10:19:42 +0800 Subject: [PATCH 06/23] Feature: Get default op format from ge graph --- ge/ir_build/ge_ir_build.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 34e612a2..1b00b334 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -230,7 +230,7 @@ class Impl { graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag); graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format); - const Graph &graph, string &default_shape, string &input_fo graphStatus UpdateDataOpAttr(const Graph &graph); + graphStatus UpdateDataOpAttr(const Graph &graph); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); @@ -378,7 +378,7 @@ graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_form return GRAPH_SUCCESS; } -graphStatus Impl::(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { +graphStatus Impl::Init(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { From 842d3f271689834fbe4a70ff144a8cb1ae794df8 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 15 Dec 2020 10:27:10 +0800 Subject: [PATCH 07/23] Feature: Get default op format from ge graph --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 1b00b334..7ae6cd49 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -378,7 +378,7 @@ graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_form return GRAPH_SUCCESS; } -graphStatus Impl::Init(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { +graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { From 9a43c9afb2a47d5aee4830f867b54f758182f175 Mon Sep 17 00:00:00 2001 From: l00444296 Date: Tue, 15 Dec 2020 10:45:45 +0800 Subject: [PATCH 08/23] Feature: Get default op format from ge graph --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 7ae6cd49..3a1a9fb9 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -414,7 +414,7 @@ graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape if (is_dynamic_input) { dynamic_shape_flag = true; default_shape += tmp_shape_str; - GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str(),); + GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); } } } From d23b490946ef326da4748b6188602d9a598189dc Mon Sep 17 00:00:00 2001 From: weiyang Date: Mon, 30 Nov 2020 19:44:33 +0800 Subject: [PATCH 09/23] support known aicpu --- .../load/new_model_manager/davinci_model.cc | 31 +++-- .../load/new_model_manager/davinci_model.h | 12 +- .../load/new_model_manager/model_manager.cc | 2 +- .../task_info/kernel_task_info.cc | 118 ++++++++---------- .../task_info/kernel_task_info.h | 2 + 5 files changed, 85 insertions(+), 80 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index bc755e07..720c3c28 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2991,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const return SUCCESS; } -Status DavinciModel::UpdateKnownZeroCopyAddr() { - for (size_t i = 0; i < total_io_addrs_.size(); ++i) { - auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); +Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { + for (size_t i = 0; i < total_io_addrs.size(); ++i) { + auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], - knonw_input_data_info_.at(total_io_addrs_[i])); - total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], + knonw_input_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); } - auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); + auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], - knonw_output_data_info_.at(total_io_addrs_[i])); - total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], + knonw_output_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); } } GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); @@ -3032,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } else { total_io_addrs_ = orig_total_io_addrs_; } - GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); + GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); if (total_args_size_ == 0) { GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); @@ -3099,7 +3099,14 @@ Status DavinciModel::MallocKnownArgs() { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - + // malloc dynamic and static hybrid memory + if (total_hybrid_args_size_ != 0) { + rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + } // malloc fixed addr memory, eg: rts op if (total_fixed_addr_size_ != 0) { GELOGI("Begin to allocate fixed addr."); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 19888e1f..27bd4de5 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -476,6 +476,14 @@ class DavinciModel { void SetTotalIOAddrs(vector &io_addrs) { total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); } + void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } + uint32_t GetHybridArgsSize() { + return total_hybrid_args_size_; + } + void *GetCurrentHybridArgsAddr(uint32_t offset) { + void *cur_args = static_cast(hybrid_addrs_) + offset; + return cur_args; + } void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); int64_t GetFixedAddrsSize(string tensor_name); void *GetCurrentFixedAddr(int64_t offset) const { @@ -494,7 +502,7 @@ class DavinciModel { Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); - Status UpdateKnownZeroCopyAddr(); + Status UpdateKnownZeroCopyAddr(vector &total_io_addrs); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); @@ -977,6 +985,8 @@ class DavinciModel { void *args_ = nullptr; void *args_host_ = nullptr; void *fixed_addrs_ = nullptr; + void *hybrid_addrs_ = nullptr; + uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; std::map knonw_input_data_info_; std::map knonw_output_data_info_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b595ac39..da4856d3 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1214,7 +1214,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "Invalid model id %u, check weather model has been loaded or not.", model_id); + "Invalid model id %u, check whether model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 74faeb24..364c7ac2 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -372,7 +372,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { Status KernelTaskInfo::Distribute() { GELOGD("KernelTaskInfo Distribute Start."); if (davinci_model_->IsKnownNode()) { - args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); + if (kernel_type_ == ccKernelType::TE) { + args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); + } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { + args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); + } GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); } rtError_t rt_ret = RT_ERROR_NONE; @@ -428,36 +432,31 @@ Status KernelTaskInfo::UpdateArgs() { const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); vector input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); vector output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); - vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); vector io_addrs; - if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { - io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); - io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); + io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); + io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); + if (kernel_type_ == ccKernelType::TE) { + vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); - } else { - string peer_input_name; - if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { - uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); - if (output_index > output_data_addrs.size()) { - GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", - output_data_addrs.size(), output_index); - return FAILED; - } - io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); - for (size_t i = 0; i < output_data_addrs.size(); ++i) { - if (i == output_index) { - void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); - io_addrs.emplace_back(fixed_addr); - continue; - } - io_addrs.emplace_back(output_data_addrs[i]); - } - io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); + davinci_model_->SetTotalIOAddrs(io_addrs); + } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { + davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); + uintptr_t io_addr = reinterpret_cast(args_addr.get()) + sizeof(aicpu::AicpuParamHead); + auto addrs_size = sizeof(uint64_t) * io_addrs.size(); + errno_t sec_ret = memcpy_s(reinterpret_cast(io_addr), addrs_size, io_addrs.data(), addrs_size); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + // copy args to device + rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } } - davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("KernelTaskInfo::UpdateArgs success."); return SUCCESS; } @@ -533,33 +532,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { } Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { - domi::KernelDef kernel_def = task_def.kernel(); - uint32_t args_size = kernel_def.args_size(); - args_offset_ = davinci_model->GetTotalArgsSize(); - davinci_model->SetTotalArgsSize(args_size); - GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); - - // get opcontext stored in model + const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); - // get opdesc - op_desc_ = davinci_model->GetOpByIndex(context.op_index()); - GE_CHECK_NOTNULL(op_desc_); - // alloc fixed addr - string peer_input_name; - if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { - uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); - if (output_index > op_desc_->GetOutputsSize()) { - GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), - output_index); - return FAILED; - } - fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); - auto tensor_desc = op_desc_->GetOutputDesc(output_index); - int64_t tensor_size = 0; - GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); - davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); - GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, - fixed_addr_offset_); + kernel_type_ = static_cast(context.kernel_type()); + if (kernel_type_ == ccKernelType::TE) { + uint32_t args_size = kernel_def.args_size(); + args_offset_ = davinci_model->GetTotalArgsSize(); + davinci_model->SetTotalArgsSize(args_size); + GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); + } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { + hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); + davinci_model->SetHybridArgsSize(kernel_def.args_size()); + GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); } return SUCCESS; } @@ -888,7 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } // copy args to new host memory - std::unique_ptr args_addr(new (std::nothrow) uint8_t[args_size_]); + args_addr = std::unique_ptr(new (std::nothrow) uint8_t[args_size_]); GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); if (sec_ret != EOK) { @@ -896,8 +880,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return FAILED; } - const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); + auto aicpu_param_head = reinterpret_cast(args_addr.get()); + const auto &ext_info = kernel_def.kernel_ext_info(); + auto init_ret = InitAicpuTaskExtInfo(ext_info); + if (init_ret != SUCCESS) { + GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); + return init_ret; + } + GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); + aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); + aicpu_param_head->extInfoLength = static_cast(ext_info.size()); + + if (davinci_model_->IsKnownNode()) { + return SUCCESS; + } + const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); vector input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); vector output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); vector io_addrs; @@ -914,19 +913,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } } - auto aicpu_param_head = reinterpret_cast(args_addr.get()); - const auto &ext_info = kernel_def.kernel_ext_info(); - auto init_ret = InitAicpuTaskExtInfo(ext_info); - if (init_ret != SUCCESS) { - GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); - return init_ret; - } - GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), - op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); - - aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); - aicpu_param_head->extInfoLength = static_cast(ext_info.size()); - // malloc device memory for args rtError_t rt_ret = rtMalloc(static_cast(&args_), args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 1f90ede1..7717edd3 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo { OpDescPtr op_desc_; DavinciModel *davinci_model_; uint32_t args_offset_ = 0; + uint32_t hybrid_args_offset_ = 0; int64_t fixed_addr_offset_ = 0; + std::unique_ptr args_addr = nullptr; bool call_save_dump_ = false; // aicpu ext_info device mem From f25a8fa11e0d19eaf650e2426915cda7264f8544 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 00:35:36 +0800 Subject: [PATCH 10/23] update submodule metadef parser --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index dba83744..97f45957 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dba83744a3ffe3d5f89496e69bb65c50f800c299 +Subproject commit 97f4595760f034bd06fca6c8e9459039413fbe2f diff --git a/parser b/parser index ce574894..6420c719 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit ce574894f13cd94749d1a3964a13e8c97c20434a +Subproject commit 6420c71989f6f7b36154b226bd8aea7790266ad1 From ccb4443e9d5f703d85982674f06173ab08f4d1eb Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 10:51:10 +0800 Subject: [PATCH 11/23] update cmakelist --- ge/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 90c341d5..073ca05c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -606,12 +606,14 @@ set(INFER_SRC_LIST if (ENABLE_OPEN_SRC) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) + message(status "libmsprofiler_fwk.a has been found!") execute_process( COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object ) file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) else() + message(status "libmsprofiler_fwk.a can not be found!") file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) endif() From 6e5a4cc4d0b88c1a4f23a5924a66efe13c738c7b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 11:22:15 +0800 Subject: [PATCH 12/23] update cmkaelist --- CMakeLists.txt | 2 +- ge/CMakeLists.txt | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bea12fcc..0bd4b913 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 073ca05c..015e7a1d 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -603,22 +603,22 @@ set(INFER_SRC_LIST "analyzer/analyzer.cc" ) -if (ENABLE_OPEN_SRC) - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) - if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) - message(status "libmsprofiler_fwk.a has been found!") - execute_process( - COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object - ) - file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) - else() - message(status "libmsprofiler_fwk.a can not be found!") - file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") - set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) - endif() - add_library(msprofiler_fwk OBJECT ${msprof_file}) -endif() +#if (ENABLE_OPEN_SRC) +# file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) +# if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) +# message(status "libmsprofiler_fwk.a has been found!") +# execute_process( +# COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a +# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object +# ) +# file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) +# else() +# message(status "libmsprofiler_fwk.a can not be found!") +# file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") +# set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) +# endif() +# add_library(msprofiler_fwk OBJECT ${msprof_file}) +#endif() if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ From b1b30afd4e47113d02cc2552582d501ad547ba95 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 14:48:51 +0800 Subject: [PATCH 13/23] update cmakelist --- CMakeLists.txt | 2 +- ge/CMakeLists.txt | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bd4b913..bea12fcc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 015e7a1d..6fe43a1c 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -620,6 +620,12 @@ set(INFER_SRC_LIST # add_library(msprofiler_fwk OBJECT ${msprof_file}) #endif() +if (ENABLE_OPEN_SRC) + add_library(msprofiler_fwk STATIC IMPORTED) + set_target_properties(msprofiler_fwk PROPERTIES + IMPORTED_LOCATION ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) +endif() + if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) From af5190136b84f59ad947a3b44b74f084a55c63d8 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 15:41:04 +0800 Subject: [PATCH 14/23] update cmakelist --- CMakeLists.txt | 6 +++--- ge/CMakeLists.txt | 28 ++++------------------------ 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bea12fcc..7416a130 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - #find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libslog.so ${ASCEND_ATC_DIR}) @@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 6fe43a1c..e18e3a1d 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -603,32 +603,9 @@ set(INFER_SRC_LIST "analyzer/analyzer.cc" ) -#if (ENABLE_OPEN_SRC) -# file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) -# if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) -# message(status "libmsprofiler_fwk.a has been found!") -# execute_process( -# COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a -# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object -# ) -# file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) -# else() -# message(status "libmsprofiler_fwk.a can not be found!") -# file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") -# set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) -# endif() -# add_library(msprofiler_fwk OBJECT ${msprof_file}) -#endif() - -if (ENABLE_OPEN_SRC) - add_library(msprofiler_fwk STATIC IMPORTED) - set_target_properties(msprofiler_fwk PROPERTIES - IMPORTED_LOCATION ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) -endif() - if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) +add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) target_compile_definitions(ge_runner PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 @@ -674,6 +651,9 @@ target_link_libraries(ge_runner ge_memory adump_server static_mmpa + -Wl,--whole-archive + msprofiler_fwk + -Wl,--no-whole-archive -Wl,--no-as-needed graph ge_common From f2821fbf25dc70c3cf4ed2d368a22fbba1c3bfc3 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 17:49:02 +0800 Subject: [PATCH 15/23] target ge_runner link --- ge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index e18e3a1d..b2186530 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -646,7 +646,7 @@ target_include_directories(ge_runner PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) -target_link_libraries(ge_runner +target_link_libraries(ge_runner PRIVATE $ ge_memory adump_server From 60c780ba524920b1ad8892b1888916f7feb3790a Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 17:57:54 +0800 Subject: [PATCH 16/23] update ascendcl cmaklist --- ge/CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index b2186530..753ae871 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -767,7 +767,14 @@ target_link_options(opensrc_ascendcl PRIVATE -Wl,--allow-multiple-definition -Wl,-z,muldefs -Wl,-Bsymbolic - -Wl,--exclude-libs,ALL + -Wl,--exclude-libs,libascend_protobuf.a + -Wl,--exclude-libs,libge_executor.a + -Wl,--exclude-libs,libge_common.a + -Wl,--exclude-libs,libgraph.a + -Wl,--exclude-libs,libmmpa.a + -Wl,--exclude-libs,libregister.a + -Wl,--exclude-libs,liberror_manager.a + -Wl,--exclude-libs,libadump_server.a ) target_link_libraries(opensrc_ascendcl PRIVATE -Wl,--whole-archive From d2ef3006a2cdc8b9788187a09a363563a22e771c Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 19:33:49 +0800 Subject: [PATCH 17/23] update ge_compiler cmake --- ge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 753ae871..26a7ee99 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -713,7 +713,7 @@ target_include_directories(ge_compiler PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) -target_link_libraries(ge_compiler +target_link_libraries(ge_compiler PRIVATE $ ge_memory static_mmpa From 7bdcefaf5e7014ddac2e51153ed7492e87ae7e09 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 16 Dec 2020 20:29:01 +0800 Subject: [PATCH 18/23] support load om through LoadRootModel --- ge/graph/load/new_model_manager/model_manager.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b595ac39..d19ca643 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1055,7 +1055,16 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model mmTimespec timespec = mmGetTickCount(); ModelHelper model_helper; - Status ret = model_helper.LoadModel(model); + Status ret = model_helper.LoadRootModel(model); + if (model_helper.GetModelType()) { + bool is_shape_unknown = false; + GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), + "CheckIsUnknownShape failed, model id:%u", + model_id); + if (is_shape_unknown || GetContext().GetHostExecFlag()) { + return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); + } + } if (ret != SUCCESS) { GELOGE(ret, "load model failed."); return ret; From d4bf8cc6acf983a9393951d4a9b8c128b01eeeab Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Sat, 12 Dec 2020 15:50:47 +0800 Subject: [PATCH 19/23] provide option to download third party software from cache server --- CMakeLists.txt | 13 +- cmake/external_libs/gflags.cmake | 1 + cmake/external_libs/gtest.cmake | 8 +- cmake/external_libs/json.cmake | 19 +- cmake/external_libs/onnx.cmake | 6 +- cmake/external_libs/protobuf_shared.cmake | 1 + cmake/external_libs/protobuf_static.cmake | 1 + cmake/external_libs/protoc.cmake | 231 +++++++++++----------- cmake/external_libs/securec.cmake | 13 +- 9 files changed, 161 insertions(+), 132 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86d0184b..49724b41 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,11 @@ endif() if(DEFINED ENV{D_PKG_SERVER}) set(GE_PB_PKG $ENV{D_PKG_SERVER}) - message("Download packages from PKG server") -endif() + message("Download packages from DPKG server") +elseif(DEFINED ENV{MSLIBS_SERVER}) + set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081") + message("Download packages from MSPKG server") +endif () set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) @@ -105,7 +108,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) - #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) + #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) @@ -127,10 +130,10 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() - message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") + message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() - if (ENABLE_GE_COV OR ENABLE_GE_UT) + if (ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) endif() diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake index f3f0f0ef..50cfb2bc 100755 --- a/cmake/external_libs/gflags.cmake +++ b/cmake/external_libs/gflags.cmake @@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index 96ea84b4..c5edcd72 100755 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/gtest/release-1.8.0.tar.gz") + set(MD5 "") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") set(MD5 "") else() @@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(gtest_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest - -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON + -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install EXCLUDE_FROM_ALL TRUE diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index c4a52843..3c1cd012 100755 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -5,19 +5,24 @@ endif() include(ExternalProject) set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) -#if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") + set(MD5 "0dc903888211db3a0f170304cd9f3a89") + set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) +#elseif (ENABLE_GITEE) # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") -# set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") -#else() -set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") -set(MD5 "0dc903888211db3a0f170304cd9f3a89") -set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) -#endif () +#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") +else() + set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") + set(MD5 "0dc903888211db3a0f170304cd9f3a89") + set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) +endif () ExternalProject_Add(json_build URL ${REQ_URL} #URL /home/txd/workspace/cloud_code/pkg/include.zip SOURCE_DIR ${JSON_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake index 9dadb544..1ee80d2d 100755 --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) -if (ENABLE_GITEE) +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") + set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") +elseif (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") set(MD5 "1bdbcecdd68ea8392630467646776e02") else() @@ -19,6 +22,7 @@ ExternalProject_Add(onnx #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 #SOURCE_DIR ${ONNX_SRC_DIR} + TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" #INSTALL_COMMAND "" diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake index c9c6b7d9..6334c8a3 100755 --- a/cmake/external_libs/protobuf_shared.cmake +++ b/cmake/external_libs/protobuf_shared.cmake @@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(protobuf_build URL ${REQ_URL} + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index 6f3e1f53..e4bbb9a0 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake index 0d162c0d..58321f04 100755 --- a/cmake/external_libs/protoc.cmake +++ b/cmake/external_libs/protoc.cmake @@ -1,115 +1,116 @@ -if (HAVE_PROTOC) - return() -endif() - -include(ExternalProject) -include(GNUInstallDirs) -#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) - -if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR - (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) - set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) - message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") -endif() - -if(GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") -else() - if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") - else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") - endif () -endif() - -set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") -set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") -ExternalProject_Add(protoc_build - URL ${REQ_URL} - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz - #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 - CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake - BUILD_COMMAND $(MAKE) - INSTALL_COMMAND $(MAKE) install - EXCLUDE_FROM_ALL TRUE -) - -set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) - -set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) - -function(protobuf_generate comp c_var h_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate() called without any proto files") - return() - endif() - set(${c_var}) - set(${h_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") - list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) - set(${c_var} ${${c_var}} PARENT_SCOPE) - set(${h_var} ${${h_var}} PARENT_SCOPE) - -endfunction() - -function(protobuf_generate_py comp py_var) - if(NOT ARGN) - message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") - return() - endif() - set(${py_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - get_filename_component(parent_subdir ${file_dir} NAME) - - if("${parent_subdir}" STREQUAL "proto") - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) - else() - set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) - endif() - list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") - - add_custom_command( - OUTPUT "${proto_output_path}/${file_name}_pb2.py" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" - COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} - DEPENDS protoc_build ${abs_file} - COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) - set(${py_var} ${${py_var}} PARENT_SCOPE) - -endfunction() - -#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") -set(HAVE_PROTOC TRUE) +if (HAVE_PROTOC) + return() +endif() + +include(ExternalProject) +include(GNUInstallDirs) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +if(GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") +else() + if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") + set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + else() + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") + set(MD5 "3d9e32700639618a4d2d342c99d4507a") + endif () +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +ExternalProject_Add(protoc_build + URL ${REQ_URL} + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + TLS_VERIFY OFF + CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) + +set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) + +function(protobuf_generate comp c_var h_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate() called without any proto files") + return() + endif() + set(${c_var}) + set(${h_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") + list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) + set(${c_var} ${${c_var}} PARENT_SCOPE) + set(${h_var} ${${h_var}} PARENT_SCOPE) + +endfunction() + +function(protobuf_generate_py comp py_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") + return() + endif() + set(${py_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}_pb2.py" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) + set(${py_var} ${${py_var}} PARENT_SCOPE) + +endfunction() + +#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") +set(HAVE_PROTOC TRUE) diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 0bd62ab2..0f8b6d3a 100755 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() +if (GE_PB_PKG) + set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") + set(MD5 "") +else() + set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") + set(MD5 "") +endif () + ExternalProject_Add(c_sec_build - URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + URL ${REQ_URL} + #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch + TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} From 71f0dd4cab47930304c87a7965a4e47efc70d76a Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Wed, 16 Dec 2020 21:14:29 +0800 Subject: [PATCH 20/23] update profiling training_Trace parser --- ge/common/profiling/profiling_manager.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 456cb0a4..214f58f4 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -143,6 +143,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { } try { Json prof_options = Json::parse(options); + if (options.find(kTrainingTrace) == std::string::npos) { + return ge::SUCCESS; + } const std::string training_trace = prof_options[kTrainingTrace]; if (training_trace.empty()) { GELOGI("Training trace will not take effect."); From 48ab1dbf12a267c57f1806a21b88b077ce40f0bb Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 17 Dec 2020 11:08:31 +0800 Subject: [PATCH 21/23] update submodule --- metadef | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadef b/metadef index 97f45957..129b50b4 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 97f4595760f034bd06fca6c8e9459039413fbe2f +Subproject commit 129b50b41f79d0dfeb9fe8987b1c19c9ac51eb8b diff --git a/parser b/parser index 6420c719..e9f7d019 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 6420c71989f6f7b36154b226bd8aea7790266ad1 +Subproject commit e9f7d0197aba57eb5247cb1e029c10e393631c89 From 984fd1bae6c2685e11f757a45665f205279e0fd8 Mon Sep 17 00:00:00 2001 From: baker Date: Thu, 17 Dec 2020 11:40:17 +0800 Subject: [PATCH 22/23] add onnx model parse api --- inc/framework/omg/parser/model_parser.h | 78 +++++++++++++++---------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 20bfcef4..57cff9a7 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -36,7 +36,7 @@ using Status = domi::Status; namespace domi { using GetGraphCallback = std::function( - const google::protobuf::Message *root_proto, const std::string &graph)>; + const google::protobuf::Message *root_proto, const std::string &graph)>; class ModelParser { public: ModelParser() {} @@ -44,19 +44,20 @@ class ModelParser { virtual ~ModelParser() {} /** - * @ingroup domi_omg - * @brief Analyze network model data - * @param [in] file Network model file path - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] file Network model file path + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ virtual Status Parse(const char *file, ge::Graph &graph) = 0; /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph * @param [in] input Model file memory data + * @param [in] input Model file memory size * @param [in|out] graph A graph for saving the model information after analysis * @return SUCCESS * @return FAILED @@ -64,36 +65,49 @@ class ModelParser { */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; +#ifndef ONLY_COMPILE_OPEN_SRC + /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in] input Model file memory size + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; +#endif + /** - * @ingroup domi_omg - * @brief Analyze network model data - * @param [in] proto network model - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] proto network model + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; /** - * @ingroup domi_omg - * @brief Analyze callback model data in subgraph - * @param [in] proto network model - * @param [in] callback callback of subgraph - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ - virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, - GetGraphCallback callback, + * @ingroup domi_omg + * @brief Analyze callback model data in subgraph + * @param [in] proto network model + * @param [in] callback callback of subgraph + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, ge::ComputeGraphPtr &graph) = 0; /** - * @ingroup domi_omg - * @brief Convert model files to JSON format - * @param [in] model_file Model file path to be converted - * @param [out] json_file Converted JSON file path - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Convert model files to JSON format + * @param [in] model_file Model file path to be converted + * @param [out] json_file Converted JSON file path + * @return SUCCESS + * @return Others failed + */ virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } /* From 70651ccf70234d12dae1c25482f7be55e14c367d Mon Sep 17 00:00:00 2001 From: l00444296 Date: Thu, 17 Dec 2020 16:17:57 +0800 Subject: [PATCH 23/23] Feature: delete several para of aclgrphParse interface --- ge/ir_build/ge_ir_build.cc | 119 ------------------------------------- 1 file changed, 119 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 3a1a9fb9..0e1d9452 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -226,10 +226,7 @@ class Impl { }; ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); - graphStatus CheckInputFormat(const string &input_format); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); - graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag); - graphStatus GetDefaultInputFormat(const Graph &graph, string &default_format); graphStatus UpdateDataOpAttr(const Graph &graph); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, @@ -323,106 +320,6 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } -graphStatus Impl::CheckInputFormat(const string &input_format) { - if (!input_format.empty()) { - auto iter = ge::input_format_str_to_geformat.find(input_format); - if (iter == ge::input_format_str_to_geformat.end()) { - GELOGE(GRAPH_PARAM_INVALID, "Input format %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.", - input_format.c_str()); - return GRAPH_PARAM_INVALID; - } - } - return GRAPH_SUCCESS; -} - -graphStatus Impl::GetDefaultInputFormat(const Graph &graph, string &default_format) { - auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { - GE_CHECK_NOTNULL(input_node); - ge::OpDescPtr op = input_node->GetOpDesc(); - GE_CHECK_NOTNULL(op); - if (op->GetType() == DATA) { - string data_op_name = op->GetName(); - GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); - ge::GeTensorDesc tensor = op->GetInputDesc(0); - ge::GeShape data_shape = tensor.GetShape(); - GELOGD("Data op get shape from InputDesc in ge ir graph."); - - const std::vector &tmp_shape = data_shape.GetDims(); - if (tmp_shape.empty()) { - GELOGD("Data op: %s has zero shape dims!", data_op_name.c_str()); - continue; - } - - bool is_dynamic_input = false; - for (auto tmp_dim : tmp_shape) { - if (tmp_dim < 0) { - is_dynamic_input = true; - } - } - - if (is_dynamic_input) { - string tmp_data_format = ge::TypeUtils::FormatToSerialString(tensor.GetFormat()); - if (!default_format.empty() && tmp_data_format!=default_format) { - GELOGE(GRAPH_PARAM_INVALID, "All data op with dynamic shape has no default format!"); - return GRAPH_PARAM_INVALID; - } else if (default_format.empty()) { - default_format.assign(tmp_data_format); - } - GELOGD("Data op name: %s, data format: %s.", data_op_name.c_str(), default_format.c_str()); - } - } - } - GELOGI("Get default data op format: %s from ge ir graph.", default_format.c_str()); - return GRAPH_SUCCESS; -} - -graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape, bool &dynamic_shape_flag) { - auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { - GE_CHECK_NOTNULL(input_node); - ge::OpDescPtr op = input_node->GetOpDesc(); - GE_CHECK_NOTNULL(op); - if (op->GetType() == DATA) { - string data_op_name = op->GetName(); - GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); - ge::GeTensorDesc tensor = op->GetInputDesc(0); - ge::GeShape data_shape = tensor.GetShape(); - GELOGD("Data op get shape from InputDesc in ge ir graph."); - - const std::vector &tmp_shape = data_shape.GetDims(); - if (tmp_shape.empty()) { - GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); - continue; - } - - string tmp_shape_str; - bool is_dynamic_input = false; - - tmp_shape_str += data_op_name + ":"; - for (auto tmp_dim : tmp_shape) { - if (tmp_dim < 0) { - is_dynamic_input = true; - } - tmp_shape_str += to_string((long)tmp_dim) + ","; - } - tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); - tmp_shape_str += ";"; - - if (is_dynamic_input) { - dynamic_shape_flag = true; - default_shape += tmp_shape_str; - GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); - } - } - } - default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); - GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); - return GRAPH_SUCCESS; -} - graphStatus Impl::Init(const Graph &graph, const std::map &options) { // 1. check options graphStatus ret = CheckOptions(options); @@ -444,24 +341,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map