For step profiling.

5 years ago · ba86111ecf
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -110,6 +110,9 @@ const char *const kWorkSpaceSize = "workspace_size";
 const char *const kTotalSize = "total_size";
 const char *const kTaskCount = "task_count";
 const char *const kTaskId = "task_id";
 const char *const kIndexId = "index_id";
 const char *const kTimeStamp = "time_stamp"; 
 const char *const kTagId = "tag_id";
 const char* const kRequestId = "request_id";
 const char* const kThreadId = "thread_id";
 const char* const kInputBeginTime = "input_begin_time";
@@ -2266,6 +2269,59 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
  return SUCCESS;
 }

 Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) {
  auto &prof_mgr = ProfilingManager::Instance();
  if (prof_mgr.ProfilingModelExecuteOn()) {
    uint64_t index_id = model->iterator_count_ + 1;
    uint64_t model_id = static_cast<uint64_t>(model->Id());
    rtStream_t stream = model->rt_model_stream_;
    GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
           index_id, model_id, tag_id);
    rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
           index_id, model_id, tag_id);

    mmTimespec timespec = mmGetTickCount();
    // 1000 ^ 3 converts second to nanosecond
    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Get profiling args, task id[%u], stream id[%u]", task_id, stream_id);
    uint32_t device_id = model->GetDeviceId();

    Json step_info;
    step_info[kIndexId] = index_id;
    step_info[kModeleId] = model_id;
    step_info[kTimeStamp] = time;
    step_info[kTagId] = tag_id;
    step_info[kTaskId] = task_id;
    step_info[kStreamId] = stream_id;
    step_info[kThreadId] = mmGetTid();

    std::string reported_data;
    try {
      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
    } catch (std::exception &e) {
      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
    } catch (...) {
      GELOGE(FAILED, "Failed to convert JSON to string.");
    }
    reported_data.append(",")
                 .append("\n");
    prof_mgr.ReportData(device_id, reported_data, "step_info");
  }
  return SUCCESS;
 }

 void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
  int64_t time = endTime;

@@ -2560,6 +2616,8 @@ void *DavinciModel::Run(DavinciModel *model) {
      GELOGI("data_wrapper is null!");
      continue;
    }
    // tag_id 0 means step begin, 1 meas step end.
    (void)ProfileStepInfo(model, 0);
    GELOGI("Getting the input data, model_id:%u", model_id);
    GE_IF_BOOL_EXEC(!model->RunFlag(), break);

@@ -2639,6 +2697,8 @@ void *DavinciModel::Run(DavinciModel *model) {
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
                    model->SetProfileTime(MODEL_AFTER_PROC_END));
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));
    // tag_id 0 means step begin, 1 meas step end.
    (void)ProfileStepInfo(model, 1);

    model->iterator_count_++;
    model->is_first_execute_ = false;
@@ -3654,6 +3714,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
  GELOGD("current_data.index=%u", input_data.index);
  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));

  // tag_id 0 means step begin, 1 meas step end.
  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0));
  if (!task_list_.empty()) {
    GELOGD("rtModelExecute do");
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
@@ -3661,7 +3723,9 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
    GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
    GELOGD("rtModelExecute end");
    iterator_count_++;
  }
  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1));

  if (!is_async_mode_) {
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -876,6 +876,8 @@ class DavinciModel {
  Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
  Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);

  static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id);

  bool is_weight_mem_has_inited_;
  bool is_feature_map_mem_has_inited_;

--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -15,15 +15,27 @@
 */

 #include "hybrid_model_executor.h"
 #include <utility>

 #include "graph/ge_context.h"
 #include "graph/runtime_inference_context.h"
 #include "common/dump/dump_manager.h"
 #include "common/profiling/profiling_manager.h"
 #include "mmpa/mmpa_api.h"

 namespace ge {
 namespace hybrid {
 namespace {
 const int kIntBase = 10;
 const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
 const char *const kIndexId = "index_id";
 const char *const kModeleId = "model_id";
 const char *const kTimeStamp = "time_stamp"; 
 const char *const kStreamId = "stream_id";
 const char *const kTaskId = "task_id";
 const char *const kTagId = "tag_id";
 const char *const kThreadId = "thread_id";
 const uint32_t kInteval = 2;
 } // namespace
 HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream)
    : model_(model), device_id_(device_id), stream_(stream) {
@@ -48,7 +60,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  GE_CHECK_NOTNULL(root_graph_item);

  SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
  // tag_id is 0 means step begin, 1 meas step end.
  GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0));
  auto ret = ExecuteGraphInternal(executor, args);
  GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1));
  Cleanup();
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End");
  GELOGD("Model executed successfully.");
@@ -66,6 +81,57 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  return SUCCESS;
 }

 Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) {
  auto &prof_mgr = ProfilingManager::Instance();
  if (prof_mgr.ProfilingModelExecuteOn()) {
    uint64_t index_id = context_.iteration + 1;
    uint64_t model_id = static_cast<uint64_t>(model->Id());
    GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
           index_id, model_id, tag_id);
    rtError_t rt_ret = rtProfilerTraceEx(index_id,  model_id, tag_id, stream_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id = %u, tag_id = %u",
           index_id, model_id, tag_id);

    mmTimespec timespec = mmGetTickCount();
    // 1000 ^ 3 converts second to nanosecond
    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Get profiling args, task id[%u], stream id[%u]", task_id, stream_id);

    Json step_info;
    step_info[kIndexId] = index_id;
    step_info[kModeleId] = model_id;
    step_info[kTimeStamp] = time;
    step_info[kTagId] = tag_id;
    step_info[kTaskId] = task_id;
    step_info[kStreamId] = stream_id;
    tep_info[kThreadId] = mmGetTid();

    std::string reported_data;
    try {
      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
    } catch (std::exception &e) {
      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
    } catch (...) {
      GELOGE(FAILED, "Failed to convert JSON to string.");
    }
    reported_data.append(",")
                 .append("\n");
    prof_mgr.ReportData(device_id_, reported_data, "step_info");
  }
  return SUCCESS;
 }

 Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
                                                 HybridModelExecutor::ExecuteArgs &args) {
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start");
--- a/ge/hybrid/executor/hybrid_model_executor.h
+++ b/ge/hybrid/executor/hybrid_model_executor.h
@@ -51,6 +51,8 @@ class HybridModelExecutor {
  Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args);
  Status Cleanup();
  Status InitExecutionContext();
  Status ProfileStepInfo(uint16_t tag_id);

  static Status ResetExecutionContext(GraphExecutionContext &context);

  HybridModel *model_;
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream
    tmp_task_desc_info.stream_id = stream_id;
    tmp_task_desc_info.shape_type = "dynamic";
    tmp_task_desc_info.cur_iter_num = iteration_ + 1;
    task_desc_info.emplace_back(tmp_task_desc_info);
    task_desc_info_.emplace_back(tmp_task_desc_info);
  }

  return SUCCESS;
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -116,10 +116,10 @@ class TaskContext {
  void SetForceInferShape(bool force_infer_shape);
  void *handle_ = nullptr;

  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info_; }
  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
                                   const std::string &task_type, uint32_t block_dim);
  void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }
  void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); }

 private:
  TaskContext(GraphExecutionContext *execution_context,
@@ -141,7 +141,7 @@ class TaskContext {
  uint64_t iteration_ = 0;
  uint32_t task_id_ = 0;
  uint32_t stream_id_ = 0;
  std::vector<TaskDescInfo> task_desc_info;
  std::vector<TaskDescInfo> task_desc_info_;
  bool is_over_flow_ = false;
 };
 }  // namespace hybrid
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -313,6 +313,8 @@ rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; }

 rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; }

 rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_) { return RT_ERROR_NONE; }

 rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; }

 rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) {
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -842,6 +842,7 @@ target_link_libraries(ge_ut_common PRIVATE
    ascend_protobuf
    json
    ge_ut_graph
    runtime_stub
 )

 # build common format
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -898,6 +898,12 @@ TEST_F(UtestDavinciModel, Sink_time_profile) {
  model.SinkTimeProfile(current_data);
 }

 TEST_F(UtestDavinciModel, Step_info_profile) {
  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
  DavinciModel model(0, nullptr);
  EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS);
 }

 class ClassTest {
 public:
    virtual ~ClassTest() {}
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -22,6 +22,7 @@
 #define protected public
 #define private public
 #include "hybrid/model/hybrid_model_builder.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "hybrid/model/hybrid_model.h"
 #include "model/ge_model.h"
 #include "model/ge_root_model.h"
@@ -36,6 +37,7 @@
 #include "hybrid/common/npu_memory_allocator.h"
 #include "graph/types.h"
 #include "graph/utils/tensor_utils.h"
 #include "common/profiling/profiling_manager.h"

 #undef private
 #undef protected
@@ -45,6 +47,11 @@ using namespace testing;
 using namespace ge;
 using namespace hybrid;

 namespace {
 int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) {
  return 0;
 }
 }  // namespace

 class UtestGeHybrid : public testing::Test {
 protected:
@@ -242,4 +249,13 @@ TEST_F(UtestGeHybrid, init_weight_success) {
  ge_sub_model->SetWeight(weight_buffer);
  ret = hybrid_model_builder.InitWeights();
  ASSERT_EQ(ret,PARAM_INVALID);
 }

 TEST_F(UtestGeHybrid, Step_info_profile) {
  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
  auto graph = make_shared<ComputeGraph>("graph");
  auto root_model = make_shared<GeRootModel>(graph);
  HybridModel model(model);
  HybridModelExecutor executor(&model, 0, nullptr);
  ASSERT_EQ(executor.ProfileStepInfo(), SUCCESS):
 }
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t
 */
 RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);

 /**
 * @ingroup profiling_base
 * @brief ts send keypoint profiler log.
 */
 RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_);

 /**
 * @ingroup profiling_base
 * @brief ts set profiling reporter callback.