From 06e9b44c5941cbc16e9ff08a8280612a87dfcbf7 Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Sat, 13 Mar 2021 17:47:42 +0800
Subject: [PATCH 1/2] For step profiling.

---
 ge/executor/ge_executor.cc | 1 +
 1 file changed, 1 insertion(+)
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index 44b2dbfa..cbb65ba1 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -813,6 +813,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
 */
 Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
                              ge::RunModelData &run_output_data, bool async_mode) {
+
   std::vector<GeTensorDesc> input_desc = {};
   std::vector<GeTensorDesc> output_desc = {};
   return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);

From b4932e3f39333be1a648f7f2155509b2c5b2d495 Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Mon, 15 Mar 2021 00:09:34 +0800
Subject: [PATCH 2/2] For step profiling.

---
 ge/graph/load/model_manager/davinci_model.cc | 63 ++++++++++++++++++++
 ge/graph/load/model_manager/davinci_model.h  |  1 +
 ge/hybrid/executor/hybrid_model_executor.cc  | 60 +++++++++++++++++++
 ge/hybrid/executor/hybrid_model_executor.h   |  2 +
 ge/hybrid/node_executor/task_context.cc      |  2 +-
 ge/hybrid/node_executor/task_context.h       |  6 +-
 6 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 9d1ba0c2..15c1025a 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -118,6 +118,9 @@ const char* const kInferBeginTime = "infer_begin_time";
 const char* const kInferEndTime = "infer_end_time";
 const char* const kOutputBeginTime = "output_start_time";
 const char* const kOutputEndTime = "output_end_time";
+const char *const kIndexId = "index_id";
+const char *const kTimeStamp = "time_stamp"; 
+const char *const kTagId = "tag_id";
 const uint32_t kStringHeadElems = 2;
 
 inline bool IsDataOp(const std::string &node_type) {
@@ -2266,6 +2269,57 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
   return SUCCESS;
 }
 
+Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) {
+  auto &prof_mgr = ProfilingManager::Instance();
+  if (prof_mgr.ProfilingModelExecuteOn()) {
+    mmTimespec timespec = mmGetTickCount();
+    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
+    uint32_t task_id = 0;
+    uint32_t stream_id = 0;
+    // stream_id 和 task_id 此处可能不太对，应该是在执行后才得到 task_id
+    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    uint32_t model_id = model->Id();
+    uint32_t device_id = model->GetDeviceId();
+    uint64_t index_id = model->iterator_count_;
+    rtStream_t stream = model->rt_model_stream_;
+
+    Json step_info;
+    step_info[kIndexId] = index_id;
+    step_info[kModeleId] = model_id;
+    step_info[kTimeStamp] = time;
+    step_info[kTagId] = tag_id;
+    step_info[kTaskId] = task_id;
+    step_info[kStreamId] = stream_id;
+
+    std::string reported_data;
+    try {
+      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
+    } catch (std::exception &e) {
+      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    } catch (...) {
+      GELOGE(FAILED, "Failed to convert JSON to string.");
+    }
+    reported_data.append(",")
+                  .append("\n");
+    prof_mgr.ReportData(device_id, reported_data, "step_info");
+
+    GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u",
+           index_id,  model_id, tag_id);
+    rtError_t rt_ret = rtProfilerTraceEx(index_id,  model_id, tag_id, stream);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u",
+           index_id,  model_id, tag_id);
+  }
+  return SUCCESS;
+}
+
 void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
   int64_t time = endTime;
 
@@ -2560,6 +2614,8 @@ void *DavinciModel::Run(DavinciModel *model) {
       GELOGI("data_wrapper is null!");
       continue;
     }
+    // tag_id is 0 means step begin, 1 meas step end.
+    GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 0));
     GELOGI("Getting the input data, model_id:%u", model_id);
     GE_IF_BOOL_EXEC(!model->RunFlag(), break);
 
@@ -2639,6 +2695,8 @@ void *DavinciModel::Run(DavinciModel *model) {
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
                     model->SetProfileTime(MODEL_AFTER_PROC_END));
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));
+    // tag_id is 0 means step begin, 1 meas step end.
+    GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 1));
 
     model->iterator_count_++;
     model->is_first_execute_ = false;
@@ -3654,6 +3712,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
   GELOGD("current_data.index=%u", input_data.index);
   GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));
 
+  // tag_id is 0 means step begin, 1 meas step end.
+  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0));
   if (!task_list_.empty()) {
     GELOGD("rtModelExecute do");
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
@@ -3661,7 +3721,10 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
     GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
     GELOGD("rtModelExecute end");
+    iterator_count_++;
   }
+  // tag_id is 0 means step begin, 1 meas step end.
+  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1));
 
   if (!is_async_mode_) {
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 70c0f687..66d69c06 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -851,6 +851,7 @@ class DavinciModel {
   Status SinkModelProfile();
 
   Status SinkTimeProfile(const InputData &current_data);
+  Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id);
 
   Status InitOutputTensorInfo(const OpDescPtr &op_desc);
   Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index 80b8983a..b796be75 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -15,6 +15,8 @@
  */
 
 #include "hybrid_model_executor.h"
+#include <utility>
+
 #include "graph/ge_context.h"
 #include "graph/runtime_inference_context.h"
 #include "common/dump/dump_manager.h"
@@ -24,6 +26,13 @@ namespace hybrid {
 namespace {
 const int kIntBase = 10;
 const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
+const char *const kIndexId = "index_id";
+const char *const kModeleId = "model_id";
+const char *const kTimeStamp = "time_stamp"; 
+const char *const kStreamId = "stream_id";
+const char *const kTaskId = "task_id";
+const char *const kTagId = "tag_id";
+const uint32_t kInteval = 2;
 } // namespace
 HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream)
     : model_(model), device_id_(device_id), stream_(stream) {
@@ -48,7 +57,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
   GE_CHECK_NOTNULL(root_graph_item);
 
   SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
+  // tag_id is 0 means step begin, 1 meas step end.
+  GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0));
   auto ret = ExecuteGraphInternal(executor, args);
+  GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1));
   Cleanup();
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End");
   GELOGD("Model executed successfully.");
@@ -66,6 +78,54 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
   return SUCCESS;
 }
 
+Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) {
+  auto &prof_mgr = ProfilingManager::Instance();
+  if (prof_mgr.ProfilingModelExecuteOn()) {
+    mmTimespec timespec = mmGetTickCount();
+    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
+    uint32_t task_id = 0;
+    uint32_t stream_id = 0;
+    // stream_id 和 task_id 此处可能不太对，应该是在执行后才得到 task_id
+    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    uint64_t index_id = context_.iteration;
+    uint32_t model_id = model_.GetModelId();
+
+    Json step_info;
+    step_info[kIndexId] = index_id;
+    step_info[kModeleId] = model_id;
+    step_info[kTimeStamp] = time;
+    step_info[kTagId] = tag_id;
+    step_info[kTaskId] = task_id;
+    step_info[kStreamId] = stream_id;
+
+    std::string reported_data;
+    try {
+      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
+    } catch (std::exception &e) {
+      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+    } catch (...) {
+      GELOGE(FAILED, "Failed to convert JSON to string.");
+    }
+    reported_data.append(",")
+                  .append("\n");
+    prof_mgr.ReportData(device_id_, reported_data, "step_info");
+
+    GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u",
+           index_id,  model_id, tag_id);
+    rtError_t rt_ret = rtProfilerTraceEx(index_id,  model_id, tag_id, stream_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u",
+           index_id,  model_id, tag_id);
+  }
+}
+
 Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
                                                  HybridModelExecutor::ExecuteArgs &args) {
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start");
diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h
index 0b2cd1ed..af0bd536 100644
--- a/ge/hybrid/executor/hybrid_model_executor.h
+++ b/ge/hybrid/executor/hybrid_model_executor.h
@@ -51,6 +51,8 @@ class HybridModelExecutor {
   Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args);
   Status Cleanup();
   Status InitExecutionContext();
+  Status ProfileStepInfo(uint16_t tag_id);
+
   static Status ResetExecutionContext(GraphExecutionContext &context);
 
   HybridModel *model_;
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index f4271551..f16e7912 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream
     tmp_task_desc_info.stream_id = stream_id;
     tmp_task_desc_info.shape_type = "dynamic";
     tmp_task_desc_info.cur_iter_num = iteration_ + 1;
-    task_desc_info.emplace_back(tmp_task_desc_info);
+    task_desc_info_.emplace_back(tmp_task_desc_info);
   }
 
   return SUCCESS;
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index e00c5048..b0b8d9ec 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -116,10 +116,10 @@ class TaskContext {
   void SetForceInferShape(bool force_infer_shape);
   void *handle_ = nullptr;
 
-  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
+  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info_; }
   Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
                                    const std::string &task_type, uint32_t block_dim);
-  void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }
+  void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); }
 
  private:
   TaskContext(GraphExecutionContext *execution_context,
@@ -141,7 +141,7 @@ class TaskContext {
   uint64_t iteration_ = 0;
   uint32_t task_id_ = 0;
   uint32_t stream_id_ = 0;
-  std::vector<TaskDescInfo> task_desc_info;
+  std::vector<TaskDescInfo> task_desc_info_;
   bool is_over_flow_ = false;
 };
 }  // namespace hybrid