From 06e9b44c5941cbc16e9ff08a8280612a87dfcbf7 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 13 Mar 2021 17:47:42 +0800 Subject: [PATCH 1/2] For step profiling. --- ge/executor/ge_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 44b2dbfa..cbb65ba1 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -813,6 +813,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat */ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, ge::RunModelData &run_output_data, bool async_mode) { + std::vector input_desc = {}; std::vector output_desc = {}; return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); From b4932e3f39333be1a648f7f2155509b2c5b2d495 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 15 Mar 2021 00:09:34 +0800 Subject: [PATCH 2/2] For step profiling. --- ge/graph/load/model_manager/davinci_model.cc | 63 ++++++++++++++++++++ ge/graph/load/model_manager/davinci_model.h | 1 + ge/hybrid/executor/hybrid_model_executor.cc | 60 +++++++++++++++++++ ge/hybrid/executor/hybrid_model_executor.h | 2 + ge/hybrid/node_executor/task_context.cc | 2 +- ge/hybrid/node_executor/task_context.h | 6 +- 6 files changed, 130 insertions(+), 4 deletions(-) diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 9d1ba0c2..15c1025a 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -118,6 +118,9 @@ const char* const kInferBeginTime = "infer_begin_time"; const char* const kInferEndTime = "infer_end_time"; const char* const kOutputBeginTime = "output_start_time"; const char* const kOutputEndTime = "output_end_time"; +const char *const kIndexId = "index_id"; +const char *const kTimeStamp = "time_stamp"; +const char *const kTagId = "tag_id"; const uint32_t kStringHeadElems = 2; inline bool IsDataOp(const std::string &node_type) { @@ -2266,6 +2269,57 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { return SUCCESS; } +Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) { + auto &prof_mgr = ProfilingManager::Instance(); + if (prof_mgr.ProfilingModelExecuteOn()) { + mmTimespec timespec = mmGetTickCount(); + int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; + uint32_t task_id = 0; + uint32_t stream_id = 0; + // stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + uint32_t model_id = model->Id(); + uint32_t device_id = model->GetDeviceId(); + uint64_t index_id = model->iterator_count_; + rtStream_t stream = model->rt_model_stream_; + + Json step_info; + step_info[kIndexId] = index_id; + step_info[kModeleId] = model_id; + step_info[kTimeStamp] = time; + step_info[kTagId] = tag_id; + step_info[kTaskId] = task_id; + step_info[kStreamId] = stream_id; + + std::string reported_data; + try { + reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id, reported_data, "step_info"); + + GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u", + index_id, model_id, tag_id); + rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u", + index_id, model_id, tag_id); + } + return SUCCESS; +} + void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { int64_t time = endTime; @@ -2560,6 +2614,8 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("data_wrapper is null!"); continue; } + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 0)); GELOGI("Getting the input data, model_id:%u", model_id); GE_IF_BOOL_EXEC(!model->RunFlag(), break); @@ -2639,6 +2695,8 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 1)); model->iterator_count_++; model->is_first_execute_ = false; @@ -3654,6 +3712,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GELOGD("current_data.index=%u", input_data.index); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0)); if (!task_list_.empty()) { GELOGD("rtModelExecute do"); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); @@ -3661,7 +3721,10 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); GELOGD("rtModelExecute end"); + iterator_count_++; } + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1)); if (!is_async_mode_) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 70c0f687..66d69c06 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -851,6 +851,7 @@ class DavinciModel { Status SinkModelProfile(); Status SinkTimeProfile(const InputData ¤t_data); + Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id); Status InitOutputTensorInfo(const OpDescPtr &op_desc); Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 80b8983a..b796be75 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -15,6 +15,8 @@ */ #include "hybrid_model_executor.h" +#include + #include "graph/ge_context.h" #include "graph/runtime_inference_context.h" #include "common/dump/dump_manager.h" @@ -24,6 +26,13 @@ namespace hybrid { namespace { const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; +const char *const kIndexId = "index_id"; +const char *const kModeleId = "model_id"; +const char *const kTimeStamp = "time_stamp"; +const char *const kStreamId = "stream_id"; +const char *const kTaskId = "task_id"; +const char *const kTagId = "tag_id"; +const uint32_t kInteval = 2; } // namespace HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) : model_(model), device_id_(device_id), stream_(stream) { @@ -48,7 +57,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { GE_CHECK_NOTNULL(root_graph_item); SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0)); auto ret = ExecuteGraphInternal(executor, args); + GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1)); Cleanup(); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); GELOGD("Model executed successfully."); @@ -66,6 +78,54 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } +Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) { + auto &prof_mgr = ProfilingManager::Instance(); + if (prof_mgr.ProfilingModelExecuteOn()) { + mmTimespec timespec = mmGetTickCount(); + int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; + uint32_t task_id = 0; + uint32_t stream_id = 0; + // stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + uint64_t index_id = context_.iteration; + uint32_t model_id = model_.GetModelId(); + + Json step_info; + step_info[kIndexId] = index_id; + step_info[kModeleId] = model_id; + step_info[kTimeStamp] = time; + step_info[kTagId] = tag_id; + step_info[kTaskId] = task_id; + step_info[kStreamId] = stream_id; + + std::string reported_data; + try { + reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id_, reported_data, "step_info"); + + GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u", + index_id, model_id, tag_id); + rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u", + index_id, model_id, tag_id); + } +} + Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 0b2cd1ed..af0bd536 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -51,6 +51,8 @@ class HybridModelExecutor { Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); Status Cleanup(); Status InitExecutionContext(); + Status ProfileStepInfo(uint16_t tag_id); + static Status ResetExecutionContext(GraphExecutionContext &context); HybridModel *model_; diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index f4271551..f16e7912 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = "dynamic"; tmp_task_desc_info.cur_iter_num = iteration_ + 1; - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info_.emplace_back(tmp_task_desc_info); } return SUCCESS; diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index e00c5048..b0b8d9ec 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -116,10 +116,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; - const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info_; } Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, uint32_t block_dim); - void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); } private: TaskContext(GraphExecutionContext *execution_context, @@ -141,7 +141,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; - std::vector task_desc_info; + std::vector task_desc_info_; bool is_over_flow_ = false; }; } // namespace hybrid