From dbd39095653ba39874ea22d24ca18f25de336387 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Fri, 18 Dec 2020 15:34:53 +0800 Subject: [PATCH] profiling data add iter num --- ge/common/profiling/profiling_manager.cc | 8 +- .../load/new_model_manager/davinci_model.cc | 5 +- ge/hybrid/executor/worker/execution_engine.cc | 90 ++++++++++++++++--- .../aicore/aicore_node_executor.cc | 10 +++ .../aicpu/aicpu_node_executor.cc | 11 +++ ge/hybrid/node_executor/task_context.cc | 16 ++++ ge/hybrid/node_executor/task_context.h | 8 ++ ge/single_op/single_op.cc | 10 ++- inc/framework/common/ge_types.h | 2 + 9 files changed, 142 insertions(+), 18 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 214f58f4..4b3dfb26 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -214,12 +214,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t block_dim = task.block_dim; uint32_t task_id = task.task_id; uint32_t stream_id = task.stream_id; + std::string shape_type = task.shape_type; + uint64_t cur_iter_num = task.cur_iter_num; data = model_name.append(" ") .append(op_name).append(" ") - .append(std::to_string(block_dim).append(" ") + .append(std::to_string(block_dim)).append(" ") .append(std::to_string(task_id)).append(" ") .append(std::to_string(stream_id)).append(" ") - .append(std::to_string(model_id)).append("\n")); + .append(std::to_string(model_id)).append(" ") + .append(shape_type).append(" ") + .append(std::to_string(cur_iter_num)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 720c3c28..7bb0af25 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3161,8 +3161,7 @@ Status DavinciModel::DistributeTask() { auto task_type = static_cast(task_def.type()); bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) - && (task_type != RT_MODEL_TASK_KERNEL_EX) - && (task_type != RT_MODEL_TASK_HCCL); + && (task_type != RT_MODEL_TASK_KERNEL_EX); GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); @@ -3177,6 +3176,8 @@ Status DavinciModel::DistributeTask() { task_desc_info.block_dim = task_def.kernel().block_dim(); task_desc_info.task_id = task->GetTaskID(); task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; task_desc_info_.emplace_back(task_desc_info); if (flag) { if (task->GetSktTaskID() != 0xFFFFFFFF) { diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 819454db..56461d31 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -74,6 +74,7 @@ class NodeDoneCallback { std::vector &compute_graph_info); Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, std::vector &task_desc_info); + Status GetNodeCurIterNum(uint64_t &cur_iter_num); GraphExecutionContext *graph_context_; std::shared_ptr context_; DumpOp dump_op_; @@ -151,29 +152,42 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(model); + // only report aicpu and aicore node + auto task_defs = model->GetTaskDefs(node); + if (task_defs == nullptr || (*task_defs).size() == 0) { + GELOGD("Node[%s] does not need to report data.", node->GetName().c_str()); + return SUCCESS; + } + const auto &task_def = (*task_defs)[0]; + auto task_type = static_cast(task_def.type()); + bool is_profiling_report = (task_type == RT_MODEL_TASK_KERNEL) || (task_type == RT_MODEL_TASK_KERNEL_EX); + + if (!is_profiling_report) { + GELOGD("Task type[%d] of Node[%s] is not aicore or aicpu, and no need to report data.", + static_cast(task_type), node->GetName().c_str()); + return SUCCESS; + } + GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); auto op_desc = node->GetOpDesc(); std::string op_name = op_desc->GetName(); std::string dynamic_model_name = model->GetModelName(); - - uint32_t task_id = 0; - uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(PARAM_INVALID, "Get task_id and stream_id failed."); + uint32_t task_id = context_->GetTaskId(); + uint32_t stream_id = context_->GetStreamId(); + uint64_t cur_iter_num = 0; + if (GetNodeCurIterNum(cur_iter_num) != SUCCESS) { + GELOGE(PARAM_INVALID, "Get cur iter num failed."); return PARAM_INVALID; } TaskDescInfo tmp_task_desc_info; tmp_task_desc_info.model_name = dynamic_model_name; tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } + tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = cur_iter_num; GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", node->GetName().c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); @@ -224,6 +238,60 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel return SUCCESS; } +Status NodeDoneCallback::GetNodeCurIterNum(uint64_t &cur_iter_num) { + GE_CHECK_NOTNULL(context_); + + uint64_t global_step = 0; + TensorValue *varible_global_step = context_->GetVariable(NODE_NAME_GLOBAL_STEP); + if (varible_global_step != nullptr) { + size_t global_step_size = varible_global_step->GetSize(); + if (global_step_size > 0) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[global_step_size]); + GE_CHECK_NOTNULL(data_buf); + GE_CHK_RT_RET(rtMemcpy(data_buf.get(), global_step_size, varible_global_step->GetData(), global_step_size, + RT_MEMCPY_DEVICE_TO_HOST)); + global_step = *reinterpret_cast(data_buf.get()); + } + } + + uint64_t loop_per_iter = 0; + TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); + if (varible_loop_per_iter != nullptr) { + size_t varible_loop_per_iter_size = varible_loop_per_iter->GetSize(); + if (varible_loop_per_iter_size > 0) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[varible_loop_per_iter_size]); + GE_CHECK_NOTNULL(data_buf); + GE_CHK_RT_RET(rtMemcpy(data_buf.get(), varible_loop_per_iter_size, varible_loop_per_iter->GetData(), + varible_loop_per_iter_size, RT_MEMCPY_DEVICE_TO_HOST)); + loop_per_iter = *reinterpret_cast(data_buf.get()); + } + } + + uint64_t loop_cond = 0; + TensorValue *varible_loop_cond = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); + if (varible_loop_cond != nullptr) { + size_t varible_loop_cond_size = varible_loop_cond->GetSize(); + if (varible_loop_cond_size > 0) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[varible_loop_cond_size]); + GE_CHECK_NOTNULL(data_buf); + GE_CHK_RT_RET(rtMemcpy(data_buf.get(), varible_loop_cond_size, varible_loop_cond->GetData(), + varible_loop_cond_size, RT_MEMCPY_DEVICE_TO_HOST)); + loop_cond = *reinterpret_cast(data_buf.get()); + } + } + + auto node = context_->GetNodeItem().node; + if (node == nullptr) { + GELOGE(PARAM_INVALID, "Node is nullptr."); + return PARAM_INVALID; + } + + GELOGD("Node[%s] has global_step: %lu, loop_per_iter:%lu, loop_cond: %lu", + node->GetName().c_str(), global_step, loop_per_iter, loop_cond); + cur_iter_num = global_step * (loop_per_iter + 1) + loop_cond + 1; + return SUCCESS; +} + Status NodeDoneCallback::ProfilingReport() { auto node = context_->GetNodeItem().node; if (node == nullptr) { diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 407210cf..d2cfbece 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -165,6 +165,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return rt_ret; + } + context.SetTaskId(task_id); + context.SetStreamId(stream_id); + GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 7330f616..43f4f6d2 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -189,6 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function workspaces_; uint64_t iteration_ = 0; + uint32_t task_id_= 0; + uint32_t stream_id_ = 0; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index a2652b67..5c9480bd 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -32,13 +32,15 @@ namespace ge { namespace { const size_t kDataMemAlignSize = 32; const size_t kDataMemAlignUnit = 2; +const string kShapeTypeDynamic = "dynamic"; +const string kShapeTypeStatic = "static"; size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } -Status ProfilingTaskInfo(OpTask *op_task) { +Status ProfilingTaskInfo(OpTask *op_task, string shape_type) { if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { return SUCCESS; } @@ -66,6 +68,8 @@ Status ProfilingTaskInfo(OpTask *op_task) { tmp_task_desc_info.block_dim = block_dim; tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = shape_type; + tmp_task_desc_info.cur_iter_num = 0; GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); @@ -193,7 +197,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic)); } return ret; @@ -255,7 +259,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, std::lock_guard lk(*stream_mutex_); GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); return SUCCESS; } } // namespace ge diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index fb1f0be1..b67cb7aa 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -245,6 +245,8 @@ struct TaskDescInfo { uint32_t block_dim; uint32_t task_id; uint32_t stream_id; + std::string shape_type; + uint64_t cur_iter_num; }; // Profiling info of graph