| @@ -852,6 +852,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||
| */ | |||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||
| ge::RunModelData &run_output_data, bool async_mode) { | |||
| std::vector<GeTensorDesc> input_desc = {}; | |||
| std::vector<GeTensorDesc> output_desc = {}; | |||
| return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); | |||
| @@ -119,6 +119,9 @@ const char* const kInferBeginTime = "infer_begin_time"; | |||
| const char* const kInferEndTime = "infer_end_time"; | |||
| const char* const kOutputBeginTime = "output_start_time"; | |||
| const char* const kOutputEndTime = "output_end_time"; | |||
| const char *const kIndexId = "index_id"; | |||
| const char *const kTimeStamp = "time_stamp"; | |||
| const char *const kTagId = "tag_id"; | |||
| const uint32_t kStringHeadElems = 2; | |||
| inline bool IsDataOp(const std::string &node_type) { | |||
| @@ -2305,6 +2308,57 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) { | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| // stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint32_t model_id = model->Id(); | |||
| uint32_t device_id = model->GetDeviceId(); | |||
| uint64_t index_id = model->iterator_count_; | |||
| rtStream_t stream = model->rt_model_stream_; | |||
| Json step_info; | |||
| step_info[kIndexId] = index_id; | |||
| step_info[kModeleId] = model_id; | |||
| step_info[kTimeStamp] = time; | |||
| step_info[kTagId] = tag_id; | |||
| step_info[kTaskId] = task_id; | |||
| step_info[kStreamId] = stream_id; | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id, reported_data, "step_info"); | |||
| GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { | |||
| int64_t time = endTime; | |||
| @@ -2599,6 +2653,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| GELOGI("data_wrapper is null!"); | |||
| continue; | |||
| } | |||
| // tag_id is 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 0)); | |||
| GELOGI("Getting the input data, model_id:%u", model_id); | |||
| GE_IF_BOOL_EXEC(!model->RunFlag(), break); | |||
| @@ -2678,6 +2734,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
| model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); | |||
| // tag_id is 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 1)); | |||
| model->iterator_count_++; | |||
| model->is_first_execute_ = false; | |||
| @@ -3691,6 +3749,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
| GELOGD("current_data.index=%u", input_data.index); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||
| // tag_id is 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0)); | |||
| if (!task_list_.empty()) { | |||
| GELOGD("rtModelExecute do"); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); | |||
| @@ -3698,7 +3758,10 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
| GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | |||
| GELOGD("rtModelExecute end"); | |||
| iterator_count_++; | |||
| } | |||
| // tag_id is 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1)); | |||
| if (!is_async_mode_) { | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||
| @@ -853,6 +853,7 @@ class DavinciModel { | |||
| Status SinkModelProfile(); | |||
| Status SinkTimeProfile(const InputData ¤t_data); | |||
| Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id); | |||
| Status InitOutputTensorInfo(const OpDescPtr &op_desc); | |||
| Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "hybrid_model_executor.h" | |||
| #include <utility> | |||
| #include "graph/ge_context.h" | |||
| #include "graph/runtime_inference_context.h" | |||
| #include "common/dump/dump_manager.h" | |||
| @@ -24,6 +26,13 @@ namespace hybrid { | |||
| namespace { | |||
| const int kIntBase = 10; | |||
| const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | |||
| const char *const kIndexId = "index_id"; | |||
| const char *const kModeleId = "model_id"; | |||
| const char *const kTimeStamp = "time_stamp"; | |||
| const char *const kStreamId = "stream_id"; | |||
| const char *const kTaskId = "task_id"; | |||
| const char *const kTagId = "tag_id"; | |||
| const uint32_t kInteval = 2; | |||
| } // namespace | |||
| HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) | |||
| : model_(model), device_id_(device_id), stream_(stream) { | |||
| @@ -53,7 +62,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||
| SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | |||
| // tag_id is 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0)); | |||
| auto ret = ExecuteGraphInternal(executor, args); | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1)); | |||
| Cleanup(); | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); | |||
| GELOGD("Model executed successfully."); | |||
| @@ -71,6 +83,54 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) { | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| // stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t index_id = context_.iteration; | |||
| uint32_t model_id = model_.GetModelId(); | |||
| Json step_info; | |||
| step_info[kIndexId] = index_id; | |||
| step_info[kModeleId] = model_id; | |||
| step_info[kTimeStamp] = time; | |||
| step_info[kTagId] = tag_id; | |||
| step_info[kTaskId] = task_id; | |||
| step_info[kStreamId] = stream_id; | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, "step_info"); | |||
| GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| } | |||
| } | |||
| Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||
| HybridModelExecutor::ExecuteArgs &args) { | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); | |||
| @@ -51,6 +51,8 @@ class HybridModelExecutor { | |||
| Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); | |||
| Status Cleanup(); | |||
| Status InitExecutionContext(); | |||
| Status ProfileStepInfo(uint16_t tag_id); | |||
| static Status ResetExecutionContext(GraphExecutionContext &context); | |||
| HybridModel *model_; | |||
| @@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||
| tmp_task_desc_info.stream_id = stream_id; | |||
| tmp_task_desc_info.shape_type = "dynamic"; | |||
| tmp_task_desc_info.cur_iter_num = iteration_ + 1; | |||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||
| task_desc_info_.emplace_back(tmp_task_desc_info); | |||
| } | |||
| return SUCCESS; | |||
| @@ -116,10 +116,10 @@ class TaskContext { | |||
| void SetForceInferShape(bool force_infer_shape); | |||
| void *handle_ = nullptr; | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info_; } | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| const std::string &task_type, uint32_t block_dim); | |||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
| void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); } | |||
| private: | |||
| TaskContext(GraphExecutionContext *execution_context, | |||
| @@ -141,7 +141,7 @@ class TaskContext { | |||
| uint64_t iteration_ = 0; | |||
| uint32_t task_id_ = 0; | |||
| uint32_t stream_id_ = 0; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| std::vector<TaskDescInfo> task_desc_info_; | |||
| bool is_over_flow_ = false; | |||
| }; | |||
| } // namespace hybrid | |||