Browse Source

Pre Merge pull request !1312 from 王笑天/master

pull/1312/MERGE
王笑天 Gitee 4 years ago
parent
commit
89453592a5
12 changed files with 319 additions and 12 deletions
  1. +37
    -0
      ge/graph/build/graph_builder.cc
  2. +10
    -0
      ge/graph/build/label_allocator.cc
  3. +9
    -0
      ge/graph/build/logical_stream_allocator.cc
  4. +1
    -0
      ge/graph/build/memory/graph_mem_assigner.cc
  5. +1
    -0
      ge/graph/build/memory/hybrid_mem_assigner.cc
  6. +17
    -4
      ge/graph/build/memory/var_mem_assign_util.cc
  7. +68
    -1
      ge/graph/build/model_builder.cc
  8. +22
    -0
      ge/graph/build/run_context.cc
  9. +65
    -3
      ge/graph/build/stream_allocator.cc
  10. +9
    -0
      ge/graph/build/stream_graph_optimizer.cc
  11. +77
    -4
      ge/graph/build/task_generator.cc
  12. +3
    -0
      inc/framework/common/debug/log.h

+ 37
- 0
ge/graph/build/graph_builder.cc View File

@@ -77,6 +77,8 @@ Status HandleSubgraphNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) {
Status HandleSubgraphDataNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) {
uint32_t index = 0;
if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, index)) {
REPORT_INNER_ERROR("E19999", "get attr:%s failed from node:%s when HandleSubgraphDataNode",
ATTR_NAME_PARENT_NODE_INDEX.c_str(), src_node->GetName().c_str());
GELOGE(FAILED, "Get attr ATTR_NAME_PARENT_NODE_INDEX failed, node:%s.", src_node->GetName().c_str());
return FAILED;
}
@@ -109,6 +111,8 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
GE_CHECK_NOTNULL(graph);
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
REPORT_INNER_ERROR("E19999", "check gelib instance null when CalcOpParam for graph:%s",
graph->GetName().c_str());
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GraphBuilder: GE is not initialized");
return GE_CLI_GE_NOT_INITIALIZED;
}
@@ -121,6 +125,8 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
(void)instance_ptr->DNNEngineManagerObj().GetDNNEngineName(node_ptr);
kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName();
if (kernel_lib_name.empty()) {
REPORT_INNER_ERROR("E19999", "op kernel lib is empty in node:%s(%s) when CalcOpParam",
node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
GELOGE(INTERNAL_ERROR, "Get node:%s(%s) kernel lib failed.", node_ptr->GetName().c_str(),
node_ptr->GetType().c_str());
return INTERNAL_ERROR;
@@ -129,12 +135,16 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {

auto ret = SetInputSize(node_ptr);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Set node:%s(%s) inputDesc size failed when CalcOpParam",
node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
GELOGE(ret, "Set node inputDesc size failed, node name is %s", node_ptr->GetName().c_str());
return ret;
}

ret = OpsKernelBuilderManager::Instance().CalcOpRunningParam(*node_ptr);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Call Calculate op:%s(%s) running param failed",
node_ptr->GetName().c_str(), node_ptr->GetType().c_str());
GELOGE(ret, "Calculate op running param failed, node name is %s", node_ptr->GetName().c_str());
return ret;
}
@@ -191,6 +201,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph

Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
if (comp_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "check compute_graph nullptr when BuildGraph, session_id:%lu", session_id);
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
return GE_GRAPH_PARAM_NULLPTR;
}
@@ -302,6 +313,8 @@ Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) {

std::vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(peer_node);
if (weights.empty()) {
REPORT_INNER_ERROR("E19999", "check weights size of node %s(%s) is empty when SetConstantInputOffset",
node->GetName().c_str(), node->GetType().c_str());
GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
return FAILED;
}
@@ -393,6 +406,7 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor
.Build();
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Insert IDENTITY node %s after %s failed", name.c_str(), in_node->GetName().c_str());
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
return FAILED;
}
@@ -423,6 +437,8 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Insert memcpy between %s and %s failed when GenerateTaskForConstant",
in_node->GetName().c_str(), node->GetName().c_str());
GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
@@ -470,6 +486,8 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
(void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
"for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid;
@@ -549,16 +567,19 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr

int64_t memory_size = 0;
if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_MEMORY_SIZE, memory_size)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_MEMORY_SIZE.c_str());
GELOGE(INTERNAL_ERROR, "Get memory size fail.");
return INTERNAL_ERROR;
}
int64_t p2p_memory_size = 0;
if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_memory_size)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
GELOGE(INTERNAL_ERROR, "Get p2p memory size fail.");
return INTERNAL_ERROR;
}
int64_t weight_size = 0;
if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_WEIGHT_SIZE, weight_size)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s fail in model", ATTR_MODEL_WEIGHT_SIZE.c_str());
GELOGE(INTERNAL_ERROR, "Get weight memory size fail.");
return INTERNAL_ERROR;
}
@@ -668,6 +689,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
const auto &op_desc = node_ptr->GetOpDesc();
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "check op_desc is nullptr when UpdateDataInputSize");
GELOGE(FAILED, "Op desc is nullptr.");
return FAILED;
}
@@ -685,6 +707,8 @@ Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
int64_t real_dim_size = 0;
ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc, real_dim_size);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:0 when UpdateDataInputSize",
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Get tensor size in bytes failed.");
return FAILED;
}
@@ -692,6 +716,8 @@ Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
ge::GeTensorDesc input_desc = op_desc->GetInputDesc(0);
ge::TensorUtils::SetSize(input_desc, real_dim_size);
if (op_desc->UpdateInputDesc(0, input_desc) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Update input desc size failed for op:%s(%s) index:0 when UpdateDataInputSize",
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Update input desc size failed.");
return FAILED;
}
@@ -720,6 +746,9 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
int64_t real_dim_size = 0;
ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc, real_dim_size);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:0 "
"when CalcDynShapeRootGraphDataSize",
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Get tensor size in bytes failed.");
return FAILED;
}
@@ -727,6 +756,9 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
ge::TensorUtils::SetSize(output_desc, real_dim_size);
GELOGI("Update dynamic shape graph data output size to [%ld].", real_dim_size);
if (op_desc->UpdateOutputDesc(0, output_desc) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Update output desc size failed for op:%s(%s) index:0 "
"when CalcDynShapeRootGraphDataSize",
op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Update dynamic shape graph data output desc size failed.");
return FAILED;
}
@@ -744,6 +776,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) {
GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) {
REPORT_INNER_ERROR("E19999", "find subgraphlis in graph:%s failed when SecondPartition",
comp_graph->GetName().c_str());
GELOGE(FAILED, "Find subgraph failed.");
return FAILED;
}
@@ -772,6 +806,9 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
mem_type);
if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE,
mem_type)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s for node:%s(%s) out_index:%u failed when AddOutputMemTypeForNode",
ATTR_OUTPUT_MEMORY_TYPE.c_str(), src_desc->GetName().c_str(), src_desc->GetType().c_str(),
src_out_anchor->GetIdx());
GELOGE(INTERNAL_ERROR, "Set out_memory_type attr for [%s:%d] failed.", src_desc->GetName().c_str(),
src_out_anchor->GetIdx());
return INTERNAL_ERROR;


+ 10
- 0
ge/graph/build/label_allocator.cc View File

@@ -28,6 +28,7 @@ LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(gr

Status LabelAllocator::AssignFunctionalLabels() {
if (compute_graph_ == nullptr) {
REPORT_INNER_ERROR("E19999", "check param compute_graph nullptr when AssignFunctionalLabels");
GELOGE(INTERNAL_ERROR, "ComputeGraph not set, Assign labels failed.");
return INTERNAL_ERROR;
}
@@ -46,11 +47,15 @@ Status LabelAllocator::AssignFunctionalLabels() {
for (auto node : functional_nodes) {
LabelMakerPtr maker = LabelMakerFactory::Instance().Create(node->GetType(), compute_graph_, node);
if (maker == nullptr) {
REPORT_CALL_ERROR("E19999", "Check Node:%s(%s) label maker not registed when AssignFunctionalLabels",
node->GetName().c_str(), node->GetType().c_str());
GELOGE(INTERNAL_ERROR, "Node: %s label maker not registed.", node->GetType().c_str());
return INTERNAL_ERROR;
}

if (maker->Run(label_index) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Node:%s(%s) run label maker failed when AssignFunctionalLabels",
node->GetName().c_str(), node->GetType().c_str());
GELOGE(INTERNAL_ERROR, "Node: %s run label maker failed.", node->GetType().c_str());
return INTERNAL_ERROR;
}
@@ -63,6 +68,7 @@ Status LabelAllocator::AssignFunctionalLabels() {

bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<NodePtr> &functional_nodes) {
if (graph == nullptr) {
REPORT_INNER_ERROR("E19999", "check param compute_graph nullptr when CollectFunctionalNode");
GELOGE(INTERNAL_ERROR, "Sub ComputeGraph is null.");
return false;
}
@@ -74,12 +80,16 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node

NodePtr func_node = graph->GetParentNode();
if (func_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Parent node not set in node:%s(%s), graph:%s",
func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str());
GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str());
return false;
}

ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph();
if (owner_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "ComputeGraph owner not set in node:%s(%s), graph:%s",
func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str());
GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str());
return false;
}


+ 9
- 0
ge/graph/build/logical_stream_allocator.cc View File

@@ -320,6 +320,8 @@ Status SingleStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &s
if (!HasAssignedStream(*subgraph)) {
const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
if (!stream_label.empty()) {
REPORT_INNER_ERROR("E19999", "Stream labels are not supported in SingleStream mode "
"(subgraph: %s, stream label: %s)", subgraph->name.c_str(), stream_label.c_str());
GELOGE(INTERNAL_ERROR, "Stream labels are not supported (subgraph: %s, stream label: %s).",
subgraph->name.c_str(), stream_label.c_str());
return INTERNAL_ERROR;
@@ -337,6 +339,8 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr
const string &engine_name = subgraph->engine_conf.id;

if (!IsEngineSkip(*subgraph) && !HasAssignedStream(*subgraph)) {
REPORT_INNER_ERROR("E19999", "Subgraph %s has not yet been assigned a stream (engine: %s) "
" when run NodeStreamUpdatePass", subgraph->name.c_str(), engine_name.c_str());
GELOGE(INTERNAL_ERROR, "Subgraph %s has not yet been assigned a stream (engine: %s).", subgraph->name.c_str(),
engine_name.c_str());
return INTERNAL_ERROR;
@@ -636,6 +640,8 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap

auto iter = subgraph_map.find(graph);
if (iter == subgraph_map.end()) {
REPORT_INNER_ERROR("E19999", "Graph %s not found in subgraph_map when do logical stream assign ",
graph->GetName().c_str());
GELOGE(FAILED, "Graph %s not found.", graph->GetName().c_str());
return FAILED;
}
@@ -675,6 +681,8 @@ Status LogicalStreamAllocator::ConvertSubgraphs(const vector<SubGraphInfoPtr> &s
const string &engine_name = subgraph_info->GetEngineName();
auto engine_conf_iter = engine_confs.find(engine_name);
if ((engine_conf_iter == engine_confs.end()) || (engine_conf_iter->second == nullptr)) {
REPORT_INNER_ERROR("E19999", "Engine conf of subgraph %s not found (engine name: %s) when ConvertSubgraphs",
subgraph_name.c_str(), engine_name.c_str());
GELOGE(INTERNAL_ERROR, "Engine conf of subgraph %s not found (engine name: %s).", subgraph_name.c_str(),
engine_name.c_str());

@@ -722,6 +730,7 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec
} else if (status == NOT_CHANGED) {
GELOGD("[Show][Status]Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
} else {
REPORT_CALL_ERROR("E19999", "Stream pass %s run failed.", pass->GetName().c_str());
GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str());
return status;
}


+ 1
- 0
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -1215,6 +1215,7 @@ Status GraphMemoryAssigner::CheckOffset() {
std::map<std::string, std::string> anchor_to_symbol;
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
return FAILED;
}


+ 1
- 0
ge/graph/build/memory/hybrid_mem_assigner.cc View File

@@ -42,6 +42,7 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_

Status HybridMemAssigner::Assign() {
if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors_, anchor_to_symbol_) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
return FAILED;
}


+ 17
- 4
ge/graph/build/memory/var_mem_assign_util.cc View File

@@ -53,6 +53,8 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr
GE_IF_BOOL_EXEC(ge::AttrUtils::GetStr(n->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name), continue);
string node_name = n->GetName();
GE_IF_BOOL_EXEC(n->GetOpDesc()->GetAllOutputsDesc().empty(),
REPORT_INNER_ERROR("E19999", "check node:%s has no OutputDesc when AssignStaticMemory2Node",
n->GetName().c_str());
GELOGE(FAILED, "node:%s has no OutputDesc.", n->GetName().c_str());
return FAILED);
ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0);
@@ -116,6 +118,8 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N
GE_CHECK_NOTNULL(node->GetOpDesc());
output_list = node->GetOpDesc()->GetOutputOffset();
if (output_list.empty()) {
REPORT_INNER_ERROR("E19999", "check node:%s output_offset_list is empty when SetOutVariableAttr",
node->GetName().c_str());
GELOGE(PARAM_INVALID, "Output_list is empty");
return PARAM_INVALID;
}
@@ -126,7 +130,12 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N
VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type));

int out_list_size = static_cast<int>(output_list.size());
GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED, "index %d >= output_list.size() %d", index, out_list_size);
if (index < out_list_size) {
REPORT_INNER_ERROR("E19999", "param index:%d >= output_list.size() %d in node %s, "
"check invalid when SetOutVariableAttr", index, out_list_size, node->GetName().c_str());
GELOGE(FAILED, "index %d >= output_list.size() %d", index, out_list_size);
return FAILED;
}

output_list[index] = static_cast<int64_t>(reinterpret_cast<intptr_t>(dev_ptr));
GELOGI("Assign node outputOffset[index] is: %ld", output_list[index]);
@@ -168,9 +177,13 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr

auto broad_cast_index = static_cast<size_t>(broad_cast_info.idx);
auto input_tensor_desc_ptr_vistor = op_desc->GetAllInputsDescPtr();
GE_CHK_BOOL_RET_STATUS(input_tensor_desc_ptr_vistor.size() > broad_cast_index, FAILED,
"Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(),
input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
if (input_tensor_desc_ptr_vistor.size() > broad_cast_index) {
REPORT_INNER_ERROR("E19999", "Get broadcast op %s input tensor desc size [%zu] < idx [%d]",
node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
GELOGE(FAILED, "Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(),
input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx);
return FAILED;
}
const ge::GeTensorDescPtr input_tensor_desc =
input_tensor_desc_ptr_vistor.at(static_cast<size_t>(broad_cast_info.idx));
int64_t input_size = 0;


+ 68
- 1
ge/graph/build/model_builder.cc View File

@@ -116,11 +116,15 @@ Status ModelBuilder::CalcOutputSize(const ge::NodePtr &n) {
int64_t size_temp = 0;
graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(desc_temp, size_temp);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed for op:%s(%s) index:%u when CalcOutputSize",
node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!");
return FAILED;
}
TensorUtils::SetSize(desc_temp, size_temp);
if (node_op_desc->UpdateOutputDesc(index, desc_temp) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Update Output desc size failed for op:%s(%s) index:%u when CalcOutputSize",
node_op_desc->GetName().c_str(), node_op_desc->GetType().c_str(), index);
GELOGE(FAILED, "UpdateOutputDesc failed.");
return FAILED;
}
@@ -207,11 +211,15 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_
if (node->GetType() == CONSTANT) {
vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(node);
if (weights.empty()) {
REPORT_INNER_ERROR("E19999", "Check weights size of node %s(%s) is empty when AdjustConstWeightSize",
node->GetName().c_str(), node->GetType().c_str());
GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
return FAILED;
}
GeTensorPtr weight = weights[0];
if (weight == nullptr) {
REPORT_INNER_ERROR("E19999", "Check weight of node %s(%s) is nullptr when AdjustConstWeightSize",
node->GetName().c_str(), node->GetType().c_str());
GELOGE(FAILED, "weights[0] is null.");
return FAILED;
}
@@ -353,6 +361,9 @@ Status ModelBuilder::AdjustInputTensorFlag() {
auto input_desc = owner_node_op_desc->GetInputDesc(in_anchors->GetIdx());
ge::TensorUtils::SetInputTensor(input_desc, true);
if (owner_node_op_desc->UpdateInputDesc(in_anchors->GetIdx(), input_desc) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Update Input desc size failed for op:%s(%s) index:%u when %s",
owner_node_op_desc->GetName().c_str(), owner_node_op_desc->GetType().c_str(),
in_anchors->GetIdx(), __FUNCTION__);
GELOGE(FAILED, "UpdateOutputDesc failed.");
return FAILED;
}
@@ -381,33 +392,51 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {

max_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_HBM];
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_MEMORY_SIZE, max_mem_offset_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_MEMORY_SIZE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_MEMORY_SIZE failed.");
return FAILED);
if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) {
p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR];
}
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_P2P_MEMORY_SIZE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_P2P_MEMORY_SIZE failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_WEIGHT_SIZE, weight_offset_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_WEIGHT_SIZE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_WEIGHT_SIZE failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_STREAM_NUM.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_STREAM_NUM failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_EVENT_NUM, event_num_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_EVENT_NUM.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_EVENT_NUM failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(&model, ATTR_MODEL_HUGE_STREAM_LIST, huge_streams_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_HUGE_STREAM_LIST.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_HUGE_STREAM_LIST failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_LABEL_NUM, label_num_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_LABEL_NUM.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_LABEL_NUM failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, zero_copy_mem_size_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_ZERO_COPY_MEMORY_SIZE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_MODEL_ZERO_COPY_MEMORY_SIZE failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, ATTR_MODEL_OUT_NODES_NAME, GetLocalOmgContext().net_out_nodes),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_OUT_NODES_NAME.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
return FAILED);
GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_,
@@ -415,6 +444,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
string fp_ceiling_mode;
if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) {
if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_FP_CEILING_MODE.c_str(), __FUNCTION__);
GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE");
return FAILED;
}
@@ -429,22 +460,30 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
int64_t core_type = (ge_core_type == kVectorCore) ? 1 : 0;
GELOGI("core_type: %ld", core_type);
if (!ge::AttrUtils::SetInt(&model, ATTR_MODEL_CORE_TYPE, core_type)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_MODEL_CORE_TYPE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt of ATTR_CORE_TYPE failed.");
}
InitL1FusionOption();
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_NAME_SWITCH_FOR_L1_FUSION.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed.");
return FAILED);
const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(session_id_);
bool is_op_debug = dump_properties.IsOpDebugOpen();
if (is_op_debug) {
if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_OP_DEBUG_FLAG.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed.");
return FAILED;
}
uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
GELOGI("Get op debug mode:%d", op_debug_mode);
if (!ge::AttrUtils::SetInt(&model, ATTR_OP_DEBUG_MODE, op_debug_mode)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed when %s",
ATTR_OP_DEBUG_MODE.c_str(), __FUNCTION__);
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_MODE failed.");
return FAILED;
}
@@ -516,6 +555,8 @@ Status ModelBuilder::MergeWeights() {
// If MutableTensor failed, weight is nullptr.
(void)ge::AttrUtils::MutableTensor(op_desc, ATTR_NAME_WEIGHTS, weight);
if (weight == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get const weight in op:%s(%s) when %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Can't get const op weight, name: %s", node->GetName().c_str());
return FAILED;
}
@@ -538,8 +579,15 @@ Status ModelBuilder::MergeWeights() {
continue;
}
if (weight_data.data() != nullptr) {
GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED);
GE_IF_BOOL_EXEC(base_addr == nullptr,
REPORT_INNER_ERROR("E19999", "Check weight in op:%s(%s) is nullptr when %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Base addr is nullptr.");
return FAILED);
if (weight_offset_ - offset < weight_data.size()) {
REPORT_INNER_ERROR("E19999", "left weight size not enough for op:%s(%s) left_size:%lu, weight_size:%lu when %s",
weight_offset_ - offset, weight_data.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu",
weight_offset_ - offset, weight_data.size());
return FAILED;
@@ -551,6 +599,9 @@ Status ModelBuilder::MergeWeights() {
auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr),
SECUREC_MEM_MAX_LEN);
if (err != EOK) {
REPORT_CALL_ERROR("E19999", "mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu, when %s",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN, __FUNCTION__);
GELOGE(FAILED, "mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
@@ -562,6 +613,9 @@ Status ModelBuilder::MergeWeights() {
}
auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), left_size, reinterpret_cast<void *>(src_ptr), left_size);
if (err != EOK) {
REPORT_CALL_ERROR("E19999", "mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu, when %s",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN, __FUNCTION__);
GELOGE(FAILED, "mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
@@ -602,6 +656,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
}
GE_IF_BOOL_EXEC(tbe_kernel == nullptr, continue);
if (tbe_name_set.count(tbe_kernel->GetName()) > 0) {
REPORT_INNER_ERROR("E19999", "tbe_kernel name %s can't be the same, judge for op:%s(%s), when %s",
tbe_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "tbe_kernel name %s can't be the same", tbe_kernel->GetName().c_str());
return FAILED;
}
@@ -618,6 +674,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
GE_IF_BOOL_EXEC(cust_aicpu_kernel == nullptr, continue);
if (aicpu_name_set.count(cust_aicpu_kernel->GetName()) > 0) {
REPORT_INNER_ERROR("E19999", "aicpu_kernel name %s can't be the same, judge for op:%s(%s), when %s",
cust_aicpu_kernel->GetName().c_str(), n->GetName().c_str(), n->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "aicpu_kernel name %s can't be the same", cust_aicpu_kernel->GetName().c_str());
return FAILED;
}
@@ -640,6 +698,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add task
GeAttrValue::BYTES task_def_bytes;
if (!AttrUtils::GetZeroCopyBytes(model, MODEL_ATTR_TASKS, task_def_bytes)) {
REPORT_CALL_ERROR("E19999", "Get attr:%s in model fail when %s", MODEL_ATTR_TASKS.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Get zero copy bytes fail.");
return INTERNAL_ERROR;
}
@@ -675,6 +734,7 @@ void ModelBuilder::SetModelVersion(ge::Model &model) {

Status ModelBuilder::PreBuildModel() {
if ((compute_graph_ == nullptr) || !(compute_graph_->IsValid())) {
REPORT_INNER_ERROR("E19999", "Check compute_graph no valid when %s", __FUNCTION__);
GELOGE(FAILED, "Graph_ is not valid.");
return FAILED;
}
@@ -754,6 +814,7 @@ Status ModelBuilder::CompileSingleOp() {
// Create ge instance
std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
if ((instance == nullptr) || !instance->InitFlag()) {
REPORT_INNER_ERROR("E19999", "Check GELib instance not init before when %s", __FUNCTION__);
GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "CompileSingleOp failed.");
return ge::GE_CLI_GE_NOT_INITIALIZED;
}
@@ -775,6 +836,8 @@ Status ModelBuilder::CompileSingleOp() {
(void)instance->DNNEngineManagerObj().GetDNNEngineName(node);
kernel_lib_name = op_desc->GetOpKernelLibName();
if (kernel_lib_name.empty()) {
REPORT_INNER_ERROR("E19999", "Check kernel lib name empty of op:%s(%s) when %s",
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(ge::INTERNAL_ERROR, "Get node:%s(%s) kernel lib failed.", node->GetName().c_str(),
node->GetType().c_str());
return ge::INTERNAL_ERROR;
@@ -785,6 +848,8 @@ Status ModelBuilder::CompileSingleOp() {
if (kernel_info != nullptr) {
node_vector_map[kernel_lib_name].emplace_back(node);
} else {
REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s, when %s",
node->GetName().c_str(), node->GetType().c_str(), kernel_lib_name.c_str(), __FUNCTION__);
GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str());
return ge::GE_GRAPH_PARAM_NULLPTR;
}
@@ -800,6 +865,8 @@ Status ModelBuilder::CompileSingleOp() {
GELOGI("[GEPERFTRACE] The node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size());
GE_TIMESTAMP_ADD(BatchCompileOp);
if (ret != ge::SUCCESS) {
REPORT_CALL_ERROR("E19999", "Batch compile op failed, kernel lib name, node size:%u, when %s",
node_vector.size(), __FUNCTION__);
GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str());
return ret;
}


+ 22
- 0
ge/graph/build/run_context.cc View File

@@ -27,15 +27,21 @@ Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_siz
std::map<int64_t, uint64_t> mem_type_to_data_mem_size, uint8_t *weight_mem_base,
uint64_t weight_mem_size) {
if ((data_mem_size > 0) && (data_mem_base == nullptr)) {
REPORT_INNER_ERROR("E19999", "InitMemInfo param data_mem_base is null but data_mem_size = %lu", data_mem_size);
GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size);
return PARAM_INVALID;
}
if ((weight_mem_size > 0) && (weight_mem_base == nullptr)) {
REPORT_INNER_ERROR("E19999", "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu",
weight_mem_size);
GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size);
return PARAM_INVALID;
}
if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() ||
mem_type_to_data_mem_base.size() != mem_type_to_data_mem_size.size()) {
REPORT_INNER_ERROR("E19999", "InitMemInfo param mem_type_to_data_mem_base size[%zu] "
"is not equal to the size of mem_type_to_data_mem_size[%zu].",
mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
GELOGE(PARAM_INVALID,
"InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of "
"mem_type_to_data_mem_size[%zu].",
@@ -55,6 +61,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
// Create rt model
rtError_t rt_ret = rtModelCreate(&rt_model_, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtModelCreate fail, ret:%d, when %s", static_cast<int>(rt_ret), __FUNCTION__);
GELOGE(RT_FAILED, "rtModelCreate failed. rt_ret = %d", static_cast<int>(rt_ret));
return RT_FAILED;
}
@@ -64,6 +71,8 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
rtStream_t stream = nullptr;
rt_ret = rtStreamCreate(&stream, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtStreamCreate fail, ret:%d, index:%u, when %s",
static_cast<int>(rt_ret), i, __FUNCTION__);
GELOGE(RT_FAILED, "rtStreamCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
return RT_FAILED;
}
@@ -71,6 +80,8 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even

rt_ret = rtModelBindStream(rt_model_, stream, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtModelBindStream fail, ret:%d, index:%u, when %s",
static_cast<int>(rt_ret), i, __FUNCTION__);
GELOGE(RT_FAILED, "Bind stream and model failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
return RT_FAILED;
}
@@ -81,6 +92,8 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
rtEvent_t event = nullptr;
rt_ret = rtEventCreate(&event);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtEventCreate fail, ret:%d, index:%u, when %s",
static_cast<int>(rt_ret), i, __FUNCTION__);
GELOGE(RT_FAILED, "rtEventCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
return RT_FAILED;
}
@@ -92,6 +105,8 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
rtLabel_t label = nullptr;
rt_ret = rtLabelCreateV2(&label, rt_model_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtLabelCreateV2 fail, ret:%d, index:%u, when %s",
static_cast<int>(rt_ret), i, __FUNCTION__);
GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
return RT_FAILED;
}
@@ -143,12 +158,15 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra
GELOGD("Begin to Create RunContext, session_id = %lu", session_id);
// check params
if (graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param graph nullptr, session_id:%lu, when %s", session_id, __FUNCTION__);
GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id);
return PARAM_INVALID;
}

uint32_t stream_num = 0;
if (!AttrUtils::GetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num)) {
REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
ATTR_MODEL_STREAM_NUM.c_str(), session_id, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}
@@ -156,6 +174,8 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra

uint32_t event_num = 0;
if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) {
REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
ATTR_MODEL_EVENT_NUM.c_str(), session_id, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}
@@ -163,6 +183,8 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra

uint32_t label_num = 0;
if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) {
REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
ATTR_MODEL_LABEL_NUM.c_str(), session_id, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}


+ 65
- 3
ge/graph/build/stream_allocator.cc View File

@@ -76,6 +76,7 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m

auto gelib = GELib::GetInstance();
if (gelib == nullptr) {
REPORT_INNER_ERROR("E19999", "Check GELib instance nullptr when %s", __FUNCTION__);
GELOGE(FAILED, "Get GELib instance failed.");
return FAILED;
}
@@ -184,6 +185,8 @@ Status StreamAllocator::AssignSingleStream() {
}

if (stream_num_ > 1) {
REPORT_INNER_ERROR("E19999", "The number of ts streams is %ld, only one is supported when %s",
stream_num_, __FUNCTION__);
GELOGE(FAILED, "The number of ts streams is %ld, only one is supported.", stream_num_);
return FAILED;
}
@@ -257,6 +260,9 @@ Status StreamAllocator::SetActiveStreamsByLabel() {
}
}
GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, activated_stream_list),
REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed when %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListInt failed.");
return FAILED);
}
@@ -307,6 +313,9 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() {
}

if (!AttrUtils::SetListInt(first_active_node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed when %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
first_active_node->GetName().c_str(), first_active_node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Set active streams for node %s failed.", first_active_node->GetName().c_str());
return FAILED;
}
@@ -376,6 +385,8 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const
}

if (next_stream_id == kInvalidStream) {
REPORT_INNER_ERROR("E19999", "Stream id of next_node %s(%s) should not be %ld when %s",
next_node->GetName().c_str(), next_node->GetType().c_str(), kInvalidStream, __FUNCTION__);
GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream);
return FAILED;
}
@@ -589,8 +600,14 @@ Status StreamAllocator::OptimizeByStreamActivate() {
// -> stream(streamSwitch) -> stream(streamActivate) -> stream(stream true or false)
// No need to insert an event between node in stream(normal) and node in stream(stream true or false)
bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const {
GE_CHECK_NOTNULL_EXEC(send_node_ptr->GetOpDesc(), GELOGE(FAILED, "op desc is nullptr"); return false);
GE_CHECK_NOTNULL_EXEC(recv_node_ptr->GetOpDesc(), GELOGE(FAILED, "op desc is nullptr"); return false);
GE_CHECK_NOTNULL_EXEC(send_node_ptr->GetOpDesc(),
REPORT_INNER_ERROR("E19999", "Check param send_node_ptr nullptr when %s", __FUNCTION__);
GELOGE(FAILED, "op desc is nullptr");
return false);
GE_CHECK_NOTNULL_EXEC(recv_node_ptr->GetOpDesc(),
REPORT_INNER_ERROR("E19999", "Check param recv_node_ptr nullptr when %s", __FUNCTION__);
GELOGE(FAILED, "op desc is nullptr");
return false);
auto cur_stream_id = send_node_ptr->GetOpDesc()->GetStreamId();
if (AttrUtils::HasAttr(recv_node_ptr->GetOpDesc(), ATTR_NAME_STREAM_LABEL)) {
// find streamActivate node
@@ -714,6 +731,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
continue;
}
if (stream_id > last_stream_id) {
REPORT_INNER_ERROR("E19999", "streamid(%ld) > last_stream_id(%ld), check invalid when %s",
stream_id, last_stream_id, __FUNCTION__);
GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
return FAILED;
}
@@ -727,6 +746,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
stream_continuous_2_node_num_map[continuous_stream_label]++;
// return error
if (stream_continuous_2_node_num_map[continuous_stream_label] > max_node_num_one_stream) {
REPORT_INNER_ERROR("E19999", "Check node[%s] stream_id[%ld] continuous stream label[%s] unsatisfied when %s",
op_desc->GetName().c_str(), stream_id, continuous_stream_label.c_str(), __FUNCTION__);
GELOGE(FAILED, "SplitStreams:node[%s] stream_id[%ld] continuous stream label[%s] unsatisfied ",
op_desc->GetName().c_str(), stream_id, continuous_stream_label.c_str());
return FAILED;
@@ -881,6 +902,8 @@ Status StreamAllocator::UpdateActiveStreamsForSwitchNode(NodePtr &switch_node) {
GE_CHECK_NOTNULL(op_desc);

if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, stream_ids)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListInt failed.");
return FAILED;
}
@@ -895,6 +918,8 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto
vector<string> ori_active_label_list;
if (!AttrUtils::GetListStr(switch_desc, ATTR_NAME_ACTIVE_LABEL_LIST, ori_active_label_list) ||
ori_active_label_list.empty()) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s fail for op:%s(%s) when %s", ATTR_NAME_ACTIVE_LABEL_LIST.c_str(),
switch_node->GetName().c_str(), switch_node->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Get active label list of switch %s failed.", switch_node->GetName().c_str());
return INTERNAL_ERROR;
}
@@ -918,6 +943,8 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto
for (auto &active_node : added_active_nodes) {
GE_CHECK_NOTNULL(switch_node->GetOutControlAnchor());
if (switch_node->GetOutControlAnchor()->LinkTo(active_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Link from %s to %s failed when %s",
switch_node->GetName().c_str(), active_node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Link %s to %s failed.", switch_node->GetName().c_str(), active_node->GetName().c_str());
return FAILED;
}
@@ -933,6 +960,8 @@ Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_
vector<uint32_t> new_active_streams = active_streams;
for (uint32_t logical_stream : active_streams) {
if (static_cast<size_t>(logical_stream) >= split_streams.size()) {
REPORT_INNER_ERROR("E19999", "Check logical stream:%u is out of range:%zu when %s",
logical_stream, split_streams.size(), __FUNCTION__);
GELOGE(FAILED, "logical stream is out of range.");
return FAILED;
}
@@ -951,6 +980,8 @@ Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_
}
}
if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str());
return FAILED;
}
@@ -991,6 +1022,8 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const {
new_active_streams.emplace(static_cast<uint32_t>(new_split_stream));
active_streams.assign(new_active_streams.begin(), new_active_streams.end());
if (!AttrUtils::SetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
active_op->GetName().c_str(), active_op->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str());
return FAILED;
}
@@ -1059,6 +1092,8 @@ Status StreamAllocator::SetActiveStreamsForLoop() {

NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node);
if (pre_switch_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Find switch node before loop active node %s fail when %s",
node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str());
return FAILED;
}
@@ -1066,6 +1101,9 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) ||
activated_label_list.empty()) {
GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListInt failed.");
return FAILED);
for (const auto &stream_id : loop_active_streams) {
@@ -1112,6 +1150,8 @@ Status StreamAllocator::CheckStreamActived() const {
uint32_t stream_id = static_cast<uint32_t>(node->GetOpDesc()->GetStreamId());
auto iter = find(active_streams.begin(), active_streams.end(), stream_id);
if (iter != active_streams.end()) {
REPORT_INNER_ERROR("E19999", "Node:%s(%s) cannot active its own stream %u, check invalid when %s",
node->GetName().c_str(), node->GetType().c_str(), stream_id, __FUNCTION__);
GELOGE(FAILED, "Node %s cannot active its own stream %u.", node->GetName().c_str(), stream_id);
return FAILED;
}
@@ -1139,6 +1179,7 @@ Status StreamAllocator::RefreshContinuousEvents() {
for (size_t i = 0; i < send_events.size(); i++) {
auto find_it = old_to_new_events.find(send_events[i]);
if (find_it == old_to_new_events.end()) {
REPORT_INNER_ERROR("E19999", "Check invalid send event %u when %s", send_events[i], __FUNCTION__);
GELOGE(FAILED, "RefreshContinuousEvents: invalid send event %u", send_events[i]);
return FAILED;
}
@@ -1152,6 +1193,7 @@ Status StreamAllocator::RefreshContinuousEvents() {
for (size_t i = 0; i < recv_events.size(); i++) {
auto find_it = old_to_new_events.find(recv_events[i]);
if (find_it == old_to_new_events.end()) {
REPORT_INNER_ERROR("E19999", "Check invalid recv event %u when %s", recv_events[i], __FUNCTION__);
GELOGE(FAILED, "RefreshContinuousEvents: invalid recv event %u", recv_events[i]);
return FAILED;
}
@@ -1180,7 +1222,11 @@ Status StreamAllocator::InsertSyncEventNodes() {

int64_t temp_stream_id = node->GetOpDesc()->GetStreamId();
op_desc_ptr->SetStreamId(temp_stream_id);
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(op_desc_ptr, RECV_ATTR_EVENT_ID, event_id), GELOGE(FAILED, "SetInt failed.");
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(op_desc_ptr, RECV_ATTR_EVENT_ID, event_id),
REPORT_INNER_ERROR("E19999", "Set Attr:%s for op:%s(%s) failed, event_id:%u, when %s",
RECV_ATTR_EVENT_ID.c_str(),
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt failed.");
return FAILED);
(void)AttrUtils::SetListStr(op_desc_ptr, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES,
std::move(std::vector<std::string>()));
@@ -1189,6 +1235,8 @@ Status StreamAllocator::InsertSyncEventNodes() {
GE_CHECK_NOTNULL(recv_node->GetOutControlAnchor());
Status status = GraphUtils::AddEdge(recv_node->GetOutControlAnchor(), node->GetInControlAnchor());
if (status != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Add edge from node %s to node %s failed when %s",
recv_node->GetName().c_str(), node->GetName().c_str(), __FUNCTION__);
GELOGE(status, "Add edge for node %s and node %s failed.", recv_node->GetName().c_str(),
node->GetName().c_str());
return status;
@@ -1217,6 +1265,8 @@ Status StreamAllocator::InsertSyncEventNodes() {
GE_CHECK_NOTNULL(send_node->GetInControlAnchor());
Status status = GraphUtils::AddEdge(node->GetOutControlAnchor(), send_node->GetInControlAnchor());
if (status != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Add edge from node %s to node %s failed when %s",
node->GetName().c_str(), send_node->GetName().c_str(), __FUNCTION__);
GELOGE(status, "Add edge for node %s and node %s failed.", node->GetName().c_str(),
send_node->GetName().c_str());
return status;
@@ -1228,6 +1278,8 @@ Status StreamAllocator::InsertSyncEventNodes() {

Status status = whole_graph_->InsertGraphEvents();
if (status != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Insert Graph Events fail, graph:%s, when %s",
whole_graph_->GetName().c_str(), __FUNCTION__);
GELOGE(status, "Graph ReorderEventNodes failed");
return status;
}
@@ -1274,6 +1326,8 @@ Status StreamAllocator::GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stre
}
rtError_t ret = rtGetMaxStreamAndTask(stream_type, &max_stream_count, &max_task_count);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "call rtGetMaxStreamAndTask fail, ret:%d, stream_type:%u, when %s",
static_cast<int>(ret), stream_type, __FUNCTION__);
GELOGE(FAILED, "Get max stream and task count by rts failed.");
return FAILED;
}
@@ -1416,6 +1470,7 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string
for (size_t i = 0; i < label_num; i++) {
const string &active_label = ori_active_label_list[i];
if (labeled_streams_.find(active_label) == labeled_streams_.end()) {
REPORT_INNER_ERROR("E19999", "can not find stream label:%s when %s", active_label.c_str(), __FUNCTION__);
GELOGE(FAILED, "can not find stream label %s", active_label.c_str());
return FAILED;
}
@@ -1442,11 +1497,15 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string
}
GE_CHECK_NOTNULL(switch_node->GetOutControlAnchor());
if (switch_node->GetOutControlAnchor()->Unlink(node->GetInControlAnchor()) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("Unlink %s to %s failed when %s",
switch_node->GetName().c_str(), node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Unlink %s to %s failed.", switch_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
GE_CHECK_NOTNULL(active_node->GetOutControlAnchor());
if (active_node->GetOutControlAnchor()->LinkTo(node->GetInControlAnchor()) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("Link %s to %s failed when %s",
active_node->GetName().c_str(), node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Link %s to %s failed.", active_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
@@ -1477,12 +1536,15 @@ Status StreamAllocator::AddActiveNodes(NodePtr &switch_node, const vector<string

Status StreamAllocator::SetActiveStreamList(NodePtr &active_node, const string &active_label) {
if (labeled_streams_.find(active_label) == labeled_streams_.end()) {
REPORT_INNER_ERROR("E19999", "Can not find stream label:%s when %s", active_label.c_str(), __FUNCTION__);
GELOGE(FAILED, "Can not find stream label %s.", active_label.c_str());
return FAILED;
}
set<int64_t> &streams = labeled_streams_[active_label];
vector<int64_t> active_streams(streams.begin(), streams.end());
if (!AttrUtils::SetListInt(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
active_node->GetName().c_str(), active_node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_ACTIVE_STREAM_LIST.c_str());
return FAILED;
}


+ 9
- 0
ge/graph/build/stream_graph_optimizer.cc View File

@@ -14,6 +14,9 @@
* limitations under the License.
*/
#include "stream_graph_optimizer.h"

#include <securec.h>

#include "common/util.h"
#include "framework/common/debug/ge_log.h"
#include "graph/utils/node_utils.h"
@@ -122,6 +125,9 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
GE_CHECK_NOTNULL(op_desc);
int64_t stream_id = op_desc->GetStreamId();
if (static_cast<size_t>(stream_id) >= run_context.graphStreamList.size()) {
REPORT_INNER_ERROR("E19999", "Check stream_id:%ld in op:%s(%s) is bigger than run_context.graphStreamList.size():%zu "
"when %s", stream_id, op_desc->GetName().c_str(),
op_desc->GetType().c_str(), run_context.graphStreamList.size(), __FUNCTION__);
GELOGE(FAILED, "stream_id %ld is bigger than run_context.graphStreamList.size() %zu", stream_id,
run_context.graphStreamList.size());
return FAILED;
@@ -135,6 +141,9 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);
REPORT_CALL_ERROR("E19999", "Call optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "
"Optimizer num: %zu, ret: %u", subgraph->GetName().c_str(), engine_name.c_str(),
graph_optimizers.size(), ret);
if (ret != SUCCESS) {
GELOGE(
ret,


+ 77
- 4
ge/graph/build/task_generator.cc View File

@@ -69,6 +69,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
GELOGD("Begin to Get TaskInfo. session_id=%lu", session_id);
// Check params
if (graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param graph is null, session_id:%lu, when %s", session_id, __FUNCTION__);
GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id);
return PARAM_INVALID;
}
@@ -93,6 +94,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
op_name.push_back(iter.second);
}
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s when %s",
ATTR_MODEL_TASK_INDEX_OP_NAME.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListStr failed.");
return FAILED);

@@ -106,6 +109,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
for (const TaskDef &task_def_temp : task_def_list) {
TaskDef *task_def = model_task_def.add_task();
if (task_def == nullptr) {
REPORT_INNER_ERROR("E19999", "Add task_def in ModelTaskDef fail, session_id:%lu, graph:%s, model:%s, when %s",
session_id, graph->GetName().c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "task_def is nullptr.");
return FAILED;
}
@@ -126,30 +131,44 @@ Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, ui
RunContext &run_context) {
GE_CHK_BOOL_EXEC(
AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast<uintptr_t>(run_context.dataMemBase)),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s when %s",
MODEL_ATTR_TASK_GEN_BASE_ADDR.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed.");
return FAILED);
GE_CHK_BOOL_EXEC(
AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast<uintptr_t>(run_context.weightMemBase)),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s when %s",
MODEL_ATTR_TASK_GEN_WEIGHT_ADDR.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed.");
return FAILED);
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_TASK_GEN_VAR_ADDR, reinterpret_cast<uintptr_t>(var_mem_base_)),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s when %s",
ATTR_MODEL_TASK_GEN_VAR_ADDR.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt ATTR_MODEL_TASK_GEN_VAR_ADDR failed.");
return FAILED);
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_VAR_SIZE, var_mem_size_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for model:%s when %s",
ATTR_MODEL_VAR_SIZE.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt ATTR_MODEL_VAR_SIZE failed.");
return FAILED);
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, MODEL_ATTR_SESSION_ID, session_id),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for mode:%s when %s",
MODEL_ATTR_SESSION_ID.c_str(), model.GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetInt MODEL_ATTR_SESSION_ID failed.");
return FAILED);

size_t task_size = model_task_def.ByteSizeLong();
ge::Buffer serial_buff(task_size);
if (!model_task_def.SerializePartialToArray(serial_buff.GetData(), static_cast<int>(task_size))) {
REPORT_INNER_ERROR("E19999", "model_task_def's serialize failed, model name = %s, task_size=%zu when %s",
model.GetName().c_str(), task_size, __FUNCTION__);
GELOGE(FAILED, "model_task_def's serialize failed, model name = %s, task_size=%zu.", model.GetName().c_str(),
task_size);
return FAILED;
}
if (!AttrUtils::SetZeroCopyBytes(model, MODEL_ATTR_TASKS, std::move(serial_buff))) {
REPORT_INNER_ERROR("E19999", "Set model task to model failed, model name = %s, task_size=%zu.",
model.GetName().c_str(), task_size, __FUNCTION__);
GELOGE(FAILED, "Set model task to model failed, model name = %s, task_size=%zu.", model.GetName().c_str(),
task_size);
return FAILED;
@@ -167,7 +186,10 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi
for (int64_t input : input_offsets) {
input_var.push_back(VarManager::Instance(session_id)->IsVarAddr(input));
}
GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsInputVar, input_var), GELOGE(FAILED, "SetListBool failed.");
GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsInputVar, input_var),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", kIsInputVar,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListBool failed.");
return FAILED);
}

@@ -177,7 +199,10 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi
for (int64_t output : output_offsets) {
output_var.push_back(VarManager::Instance(session_id)->IsVarAddr(output));
}
GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsOutputVar, output_var), GELOGE(FAILED, "SetListBool failed.");
GE_CHK_BOOL_EXEC(AttrUtils::SetListBool(op_desc, kIsOutputVar, output_var),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", kIsOutputVar,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "SetListBool failed.");
return FAILED);
}
return SUCCESS;
@@ -252,6 +277,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str());
std::shared_ptr<GELib> ge_lib = GELib::GetInstance();
if ((ge_lib == nullptr) || !ge_lib->InitFlag()) {
REPORT_INNER_ERROR("E19999", "Check GELib instance not init before when %s", __FUNCTION__);
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}
@@ -319,6 +345,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
}
auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name);
if (kernel_info_store == nullptr) {
REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s when %s",
node->GetName().c_str(), node->GetType().c_str(), op_kernel_lib_name.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR,
"No ops kernel store or ops kernel builder found. node:%s(%s), op_kernel_lib_name=%s.",
name.c_str(),
@@ -344,6 +372,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
auto ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list);
GE_TIMESTAMP_ADD(GenerateTask);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s) when %s",
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(ret, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task failed.",
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id);
return ret;
@@ -353,6 +383,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
size_t task_list_size_after = task_def_list.size();
// If tasks is reduced
if (task_list_size_after < task_list_size_before) {
REPORT_INNER_ERROR("E19999", "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task "
"but task num from %zu to %zu, check invalid", op_kernel_lib_name.c_str(), name.c_str(),
type.c_str(), op_id, stream_id, task_list_size_before, task_list_size_after);
GELOGE(FAILED, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task. but task num from %zu to %zu.",
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, task_list_size_before,
task_list_size_after);
@@ -417,6 +450,9 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
size_t task_list_size_before = task_def_list.size();
OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name);
if (kernel_info_store == nullptr) {
REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s when %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
op_kernel_lib_name.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR,
"Fusion: No ops kernel store or ops kernel builder found. fusion_node:%s(%s), op_kernel_lib_name=%s.",
fusion_node_name.c_str(), fusion_node_type.c_str(), op_kernel_lib_name.c_str());
@@ -433,6 +469,9 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
int64_t op_id = op_desc->GetId();
int64_t stream_id = op_desc->GetStreamId();
if (stream_id < 0 || stream_id >= (int64_t)run_context.graphStreamList.size()) {
REPORT_INNER_ERROR("E19999", "Fusion: fusion_node[name:%s(%s), id:%ld] stream id is invalid, "
"stream list size=%zu, when %s", fusion_node_name.c_str(), fusion_node_type.c_str(),
op_id, run_context.graphStreamList.size(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Fusion: fusion_node[name:%s(%s), id:%ld] stream id is invalid, stream list size=%zu",
fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, run_context.graphStreamList.size());
return INTERNAL_ERROR;
@@ -444,6 +483,9 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
ret = OpsKernelBuilderManager::Instance().GenerateTask(*fusion_node, run_context, task_def_list);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", " Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
"id:%ld, stream_id:%ld] task failed when %s", op_kernel_lib_name.c_str(),
fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, __FUNCTION__);
GELOGE(ret,
"Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
"id:%ld, stream_id:%ld] task failed.",
@@ -455,6 +497,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
size_t task_list_size_after = task_def_list.size();
// if tasks is reduced
if (task_list_size_after < task_list_size_before) {
REPORT_INNER_ERROR("E19999", "InsertProfilingTask for fusion_node:[fusion_node_name:%s(%s), "
"id:%ld, stream_id:%ld] task, but task num from %zu to %zu, check invalid when %s",
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(),
op_id, stream_id, task_list_size_before, task_list_size_after, __FUNCTION__);
GELOGE(FAILED,
"Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), "
"id:%ld, stream_id:%ld] task. but task num from %zu to %zu.",
@@ -489,6 +535,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info

Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "SetAllAnchorStatus fail for op:%s(%s) when %s",
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "NodeUtils::SetAllAnchorStatus failed.");
return INTERNAL_ERROR;
}
@@ -496,6 +544,8 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
auto peer_anchor = anchor->GetPeerOutAnchor();
if (peer_anchor == nullptr) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Set in peer anchor status fail for op:%s(%s), anchor_index:%d, when %s",
node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
return INTERNAL_ERROR;
}
@@ -506,11 +556,15 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
bool is_const = NodeUtils::GetConstOpType(peer_anchor->GetOwnerNode(), const_type);
if (is_const && (const_type == CONSTANT)) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Set in anchor CONST status fail for op:%s(%s), anchor_index:%d, when %s",
node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
return INTERNAL_ERROR;
}
} else {
if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Set in anchor DATA status fail for op:%s(%s), anchor_index:%d, when %s",
node->GetName().c_str(), node->GetType().c_str(), anchor->GetIdx(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
return INTERNAL_ERROR;
}
@@ -523,12 +577,15 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
auto ge_lib = GELib::GetInstance();
if ((ge_lib == nullptr) || !ge_lib->InitFlag()) {
REPORT_INNER_ERROR("E19999", "Check GELib instance not init before when %s", __FUNCTION__);
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized or is finalized.");
return GE_CLI_GE_NOT_INITIALIZED;
}

const auto all_nodes = graph->GetNodes(graph->GetGraphUnknownFlag());
if (all_nodes.empty()) {
REPORT_INNER_ERROR("E19999", "Check param all_nodes empty in graph:%s when %s",
graph->GetName().c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Graph's node is empty");
return GE_GRAPH_GRAPH_NODE_NULL;
}
@@ -584,6 +641,9 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
for (auto &op_desc : continuous_ops) {
string op_kernel_lib_name = op_desc->GetOpKernelLibName();
if (op_kernel_lib_name.empty()) {
REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s when %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
op_kernel_lib_name.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "node:%s(%s) get op kernel lib failed.", op_desc->GetName().c_str(),
op_desc->GetType().c_str());
return INTERNAL_ERROR;
@@ -599,9 +659,17 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_

for (auto &it : first_and_last_ops) {
auto &op_pair = it.second;
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.first, kIsFirstNode, true), GELOGE(FAILED, "SetBool failed.");
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.first, kIsFirstNode, true),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", kIsFirstNode,
op_pair.first->GetName().c_str(), op_pair.first->GetType().c_str(),
__FUNCTION__);
GELOGE(FAILED, "SetBool failed.");
return FAILED);
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.second, kIsLastNode, true), GELOGE(FAILED, "SetBool failed.");
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_pair.second, kIsLastNode, true),
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s) when %s", kIsLastNode,
op_pair.second->GetName().c_str(), op_pair.second->GetType().c_str(),
__FUNCTION__);
GELOGE(FAILED, "SetBool failed.");
return FAILED);
}
}
@@ -906,6 +974,8 @@ Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std:
for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
if (all_reduce_nodes[i] == node_index) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
"for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
ar_log_id = i * kProfilingArStep + kProfilingArStartLogid;
@@ -998,6 +1068,8 @@ Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::
for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
if (all_reduce_nodes[i] == node_index) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
REPORT_INNER_ERROR("E19999", "Multiply result is out of range when calc profiling ar log id "
"for node:%s(%s)", op_desc->GetName().c_str(), op_desc->GetType().c_str());
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
ar_log_id = i * kProfilingArStep + kProfilingArEndLogid;
@@ -1107,6 +1179,7 @@ Status TaskGenerator::SetUnknownShapeStream(RunContext &run_context, rtStream_t
run_context.stream = stream;
rtError_t rt_ret = rtModelBindStream(run_context.model, stream, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelBindStream fail, ret:0x%X when %s", rt_ret, __FUNCTION__);
GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
GE_CHK_RT_RET(rtStreamDestroy(stream));
return FAILED;


+ 3
- 0
inc/framework/common/debug/log.h View File

@@ -19,6 +19,7 @@

#include <string>
#include <sstream>
#include <securec.h>

#include "runtime/rt.h"
#include "common/string_util.h"
@@ -105,6 +106,7 @@
do { \
bool b = (expr); \
if (!b) { \
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \
GELOGE(_status, __VA_ARGS__); \
return _status; \
} \
@@ -193,6 +195,7 @@
{ \
bool b = (expr); \
if (b) { \
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \
DOMI_LOGE(__VA_ARGS__); \
exec_expr; \
return _status; \


Loading…
Cancel
Save