From dbd39095653ba39874ea22d24ca18f25de336387 Mon Sep 17 00:00:00 2001
From: zhengyuanhua <zhengyuanhua1@huawei.com>
Date: Fri, 18 Dec 2020 15:34:53 +0800
Subject: [PATCH] profiling data add iter num

---
 ge/common/profiling/profiling_manager.cc      |  8 +-
 .../load/new_model_manager/davinci_model.cc   |  5 +-
 ge/hybrid/executor/worker/execution_engine.cc | 90 ++++++++++++++++---
 .../aicore/aicore_node_executor.cc            | 10 +++
 .../aicpu/aicpu_node_executor.cc              | 11 +++
 ge/hybrid/node_executor/task_context.cc       | 16 ++++
 ge/hybrid/node_executor/task_context.h        |  8 ++
 ge/single_op/single_op.cc                     | 10 ++-
 inc/framework/common/ge_types.h               |  2 +
 9 files changed, 142 insertions(+), 18 deletions(-)
diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 214f58f4..4b3dfb26 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -214,12 +214,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
     uint32_t block_dim = task.block_dim;
     uint32_t task_id = task.task_id;
     uint32_t stream_id = task.stream_id;
+    std::string shape_type = task.shape_type;
+    uint64_t cur_iter_num = task.cur_iter_num;
     data = model_name.append(" ")
                      .append(op_name).append(" ")
-                     .append(std::to_string(block_dim).append(" ")
+                     .append(std::to_string(block_dim)).append(" ")
                      .append(std::to_string(task_id)).append(" ")
                      .append(std::to_string(stream_id)).append(" ")
-                     .append(std::to_string(model_id)).append("\n"));
+                     .append(std::to_string(model_id)).append(" ")
+                     .append(shape_type).append(" ")
+                     .append(std::to_string(cur_iter_num)).append("\n");
 
     ReporterData reporter_data{};
     reporter_data.deviceId = device_id;
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 720c3c28..7bb0af25 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -3161,8 +3161,7 @@ Status DavinciModel::DistributeTask() {
 
     auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
     bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL)
-        && (task_type != RT_MODEL_TASK_KERNEL_EX)
-        && (task_type != RT_MODEL_TASK_HCCL);
+        && (task_type != RT_MODEL_TASK_KERNEL_EX);
     GE_IF_BOOL_EXEC(no_need_profiling, continue);
 
     SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
@@ -3177,6 +3176,8 @@ Status DavinciModel::DistributeTask() {
     task_desc_info.block_dim = task_def.kernel().block_dim();
     task_desc_info.task_id = task->GetTaskID();
     task_desc_info.stream_id = task->GetStreamId();
+    task_desc_info.shape_type = "static";
+    task_desc_info.cur_iter_num = 0;
     task_desc_info_.emplace_back(task_desc_info);
     if (flag) {
       if (task->GetSktTaskID() != 0xFFFFFFFF) {
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index 819454db..56461d31 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -74,6 +74,7 @@ class NodeDoneCallback {
                           std::vector<ComputeGraphDescInfo> &compute_graph_info);
   Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
                          std::vector<TaskDescInfo> &task_desc_info);
+  Status GetNodeCurIterNum(uint64_t &cur_iter_num);
   GraphExecutionContext *graph_context_;
   std::shared_ptr<TaskContext> context_;
   DumpOp dump_op_;
@@ -151,29 +152,42 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
   GE_CHECK_NOTNULL(node);
   GE_CHECK_NOTNULL(model);
 
+  // only report aicpu and aicore node
+  auto task_defs = model->GetTaskDefs(node);
+  if (task_defs == nullptr || (*task_defs).size() == 0) {
+    GELOGD("Node[%s] does not need to report data.", node->GetName().c_str());
+    return SUCCESS;
+  }
+  const auto &task_def = (*task_defs)[0];
+  auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
+  bool is_profiling_report = (task_type == RT_MODEL_TASK_KERNEL) || (task_type == RT_MODEL_TASK_KERNEL_EX);
+
+  if (!is_profiling_report) {
+    GELOGD("Task type[%d] of Node[%s] is not aicore or aicpu, and no need to report data.",
+           static_cast<int>(task_type), node->GetName().c_str());
+    return SUCCESS;
+  }
+
   GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
   auto op_desc = node->GetOpDesc();
   std::string op_name = op_desc->GetName();
   std::string dynamic_model_name = model->GetModelName();
-
-  uint32_t task_id = 0;
-  uint32_t stream_id = 0;
-  if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
-    GELOGE(PARAM_INVALID, "Get task_id and stream_id failed.");
+  uint32_t task_id = context_->GetTaskId();
+  uint32_t stream_id = context_->GetStreamId();
+  uint64_t cur_iter_num = 0;
+  if (GetNodeCurIterNum(cur_iter_num) != SUCCESS) {
+    GELOGE(PARAM_INVALID, "Get cur iter num failed.");
     return PARAM_INVALID;
   }
 
   TaskDescInfo tmp_task_desc_info;
   tmp_task_desc_info.model_name = dynamic_model_name;
   tmp_task_desc_info.op_name = op_name;
-  tmp_task_desc_info.block_dim = 0;
-  auto task_defs = model->GetTaskDefs(node);
-  if (task_defs != nullptr && (*task_defs).size() > 0) {
-    const auto &task_def = (*task_defs)[0];
-    tmp_task_desc_info.block_dim = task_def.kernel().block_dim();
-  }
+  tmp_task_desc_info.block_dim = task_def.kernel().block_dim();
   tmp_task_desc_info.task_id = task_id;
   tmp_task_desc_info.stream_id = stream_id;
+  tmp_task_desc_info.shape_type = "dynamic";
+  tmp_task_desc_info.cur_iter_num = cur_iter_num;
   GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
          node->GetName().c_str(), task_id, stream_id);
   task_desc_info.emplace_back(tmp_task_desc_info);
@@ -224,6 +238,60 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
   return SUCCESS;
 }
 
+Status NodeDoneCallback::GetNodeCurIterNum(uint64_t &cur_iter_num) {
+  GE_CHECK_NOTNULL(context_);
+
+  uint64_t global_step = 0;
+  TensorValue *varible_global_step = context_->GetVariable(NODE_NAME_GLOBAL_STEP);
+  if (varible_global_step != nullptr) {
+    size_t global_step_size = varible_global_step->GetSize();
+    if (global_step_size > 0) {
+      std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[global_step_size]);
+      GE_CHECK_NOTNULL(data_buf);
+      GE_CHK_RT_RET(rtMemcpy(data_buf.get(), global_step_size, varible_global_step->GetData(), global_step_size,
+                             RT_MEMCPY_DEVICE_TO_HOST));
+      global_step = *reinterpret_cast<uint64_t *>(data_buf.get());
+    }
+  }
+
+  uint64_t loop_per_iter = 0;
+  TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
+  if (varible_loop_per_iter != nullptr) {
+    size_t varible_loop_per_iter_size = varible_loop_per_iter->GetSize();
+    if (varible_loop_per_iter_size > 0) {
+      std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[varible_loop_per_iter_size]);
+      GE_CHECK_NOTNULL(data_buf);
+      GE_CHK_RT_RET(rtMemcpy(data_buf.get(), varible_loop_per_iter_size, varible_loop_per_iter->GetData(),
+                             varible_loop_per_iter_size, RT_MEMCPY_DEVICE_TO_HOST));
+      loop_per_iter = *reinterpret_cast<uint64_t *>(data_buf.get());
+    }
+  }
+
+  uint64_t loop_cond = 0;
+  TensorValue *varible_loop_cond = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND);
+  if (varible_loop_cond != nullptr) {
+    size_t varible_loop_cond_size = varible_loop_cond->GetSize();
+    if (varible_loop_cond_size > 0) {
+      std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[varible_loop_cond_size]);
+      GE_CHECK_NOTNULL(data_buf);
+      GE_CHK_RT_RET(rtMemcpy(data_buf.get(), varible_loop_cond_size, varible_loop_cond->GetData(),
+                             varible_loop_cond_size, RT_MEMCPY_DEVICE_TO_HOST));
+      loop_cond = *reinterpret_cast<uint64_t *>(data_buf.get());
+    }
+  }
+
+  auto node = context_->GetNodeItem().node;
+  if (node == nullptr) {
+    GELOGE(PARAM_INVALID, "Node is nullptr.");
+    return PARAM_INVALID;
+  }
+
+  GELOGD("Node[%s] has global_step: %lu, loop_per_iter:%lu, loop_cond: %lu",
+         node->GetName().c_str(), global_step, loop_per_iter, loop_cond);
+  cur_iter_num = global_step * (loop_per_iter + 1) + loop_cond + 1;
+  return SUCCESS;
+}
+
 Status NodeDoneCallback::ProfilingReport() {
   auto node = context_->GetNodeItem().node;
   if (node == nullptr) {
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index 407210cf..d2cfbece 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -165,6 +165,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
     }
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
     GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
+    uint32_t task_id = 0;
+    uint32_t stream_id = 0;
+    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(rt_ret, "Get task_id and stream_id failed.");
+      return rt_ret;
+    }
+    context.SetTaskId(task_id);
+    context.SetStreamId(stream_id);
+    GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
   }
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 7330f616..43f4f6d2 100755
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -189,6 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
 
   GE_CHK_STATUS_RET(LaunchTask(context));
 
+  uint32_t task_id = 0;
+  uint32_t stream_id = 0;
+  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(rt_ret, "Get task_id and stream_id failed.");
+    return rt_ret;
+  }
+  context.SetTaskId(task_id);
+  context.SetStreamId(stream_id);
+  GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
+
   auto callback = [=, &context]() {
     GELOGD("Node[%s] callback start.", node_name_.c_str());
     RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index f16bfb2f..d15ea978 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -319,6 +319,22 @@ void TaskContext::SetStatus(Status status) {
   }
 }
 
+uint32_t TaskContext::GetTaskId() const {
+  return task_id_;
+}
+
+void TaskContext::SetTaskId(uint32_t task_id) {
+  task_id_ = task_id;
+}
+
+uint32_t TaskContext::GetStreamId() const {
+  return stream_id_;
+}
+
+void TaskContext::SetStreamId(uint32_t stream_id) {
+  stream_id_ = stream_id;
+}
+
 Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
   GE_CHECK_NOTNULL(buffer);
   if (ori_addr == nullptr) {
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 34754a14..0e85a8e3 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -96,6 +96,12 @@ class TaskContext {
 
   void SetStatus(Status status);
 
+  uint32_t GetTaskId() const;
+  void SetTaskId(uint32_t task_id);
+
+  uint32_t GetStreamId() const;
+  void SetStreamId(uint32_t stream_id);
+
   bool IsForceInferShape() const;
   void SetForceInferShape(bool force_infer_shape);
   void *handle_ = nullptr;
@@ -117,6 +123,8 @@ class TaskContext {
   Status status_ = SUCCESS;
   std::vector<void *> workspaces_;
   uint64_t iteration_ = 0;
+  uint32_t task_id_= 0;
+  uint32_t stream_id_ = 0;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index a2652b67..5c9480bd 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -32,13 +32,15 @@ namespace ge {
 namespace {
 const size_t kDataMemAlignSize = 32;
 const size_t kDataMemAlignUnit = 2;
+const string kShapeTypeDynamic = "dynamic";
+const string kShapeTypeStatic = "static";
 
 size_t GetAlignedSize(size_t size) {
   size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
   return aligned_size;
 }
 
-Status ProfilingTaskInfo(OpTask *op_task) {
+Status ProfilingTaskInfo(OpTask *op_task, string shape_type) {
   if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
     return SUCCESS;
   }
@@ -66,6 +68,8 @@ Status ProfilingTaskInfo(OpTask *op_task) {
   tmp_task_desc_info.block_dim = block_dim;
   tmp_task_desc_info.task_id = task_id;
   tmp_task_desc_info.stream_id = stream_id;
+  tmp_task_desc_info.shape_type = shape_type;
+  tmp_task_desc_info.cur_iter_num = 0;
   GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
   task_desc_info.emplace_back(tmp_task_desc_info);
 
@@ -193,7 +197,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
     if (ret != SUCCESS) {
       return ret;
     }
-    GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task));
+    GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic));
   }
 
   return ret;
@@ -255,7 +259,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
   std::lock_guard<std::mutex> lk(*stream_mutex_);
 
   GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
-  GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get()));
+  GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
   return SUCCESS;
 }
 }  // namespace ge
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index fb1f0be1..b67cb7aa 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -245,6 +245,8 @@ struct TaskDescInfo {
   uint32_t block_dim;
   uint32_t task_id;
   uint32_t stream_id;
+  std::string shape_type;
+  uint64_t cur_iter_num;
 };
 
 // Profiling info of graph