| @@ -111,6 +111,9 @@ const char *const kWorkSpaceSize = "workspace_size"; | |||
| const char *const kTotalSize = "total_size"; | |||
| const char *const kTaskCount = "task_count"; | |||
| const char *const kTaskId = "task_id"; | |||
| const char *const kIndexId = "index_id"; | |||
| const char *const kTimeStamp = "time_stamp"; | |||
| const char *const kTagId = "tag_id"; | |||
| const char* const kRequestId = "request_id"; | |||
| const char* const kThreadId = "thread_id"; | |||
| const char* const kInputBeginTime = "input_begin_time"; | |||
| @@ -2305,6 +2308,59 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) { | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||
| uint64_t index_id = model->iterator_count_ + 1; | |||
| uint64_t model_id = static_cast<uint64_t>(model->Id()); | |||
| rtStream_t stream = model->rt_model_stream_; | |||
| GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| // 1000 ^ 3 converts second to nanosecond | |||
| int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); | |||
| uint32_t device_id = model->GetDeviceId(); | |||
| Json step_info; | |||
| step_info[kIndexId] = index_id; | |||
| step_info[kModeleId] = model_id; | |||
| step_info[kTimeStamp] = time; | |||
| step_info[kTagId] = tag_id; | |||
| step_info[kTaskId] = task_id; | |||
| step_info[kStreamId] = stream_id; | |||
| step_info[kThreadId] = mmGetTid(); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id, reported_data, "step_info"); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { | |||
| int64_t time = endTime; | |||
| @@ -2599,6 +2655,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| GELOGI("data_wrapper is null!"); | |||
| continue; | |||
| } | |||
| // tag_id 0 means step begin, 1 meas step end. | |||
| (void)DavinciModel::ProfileStepInfo(model, 0); | |||
| GELOGI("Getting the input data, model_id:%u", model_id); | |||
| GE_IF_BOOL_EXEC(!model->RunFlag(), break); | |||
| @@ -2678,6 +2736,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
| model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); | |||
| // tag_id 0 means step begin, 1 meas step end. | |||
| (void)DavinciModel::ProfileStepInfo(model, 1); | |||
| model->iterator_count_++; | |||
| model->is_first_execute_ = false; | |||
| @@ -3691,6 +3751,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
| GELOGD("current_data.index=%u", input_data.index); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||
| // tag_id 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0)); | |||
| if (!task_list_.empty()) { | |||
| GELOGD("rtModelExecute do"); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); | |||
| @@ -3698,7 +3760,9 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
| GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | |||
| GELOGD("rtModelExecute end"); | |||
| iterator_count_++; | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1)); | |||
| if (!is_async_mode_) { | |||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||
| @@ -878,6 +878,8 @@ class DavinciModel { | |||
| Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); | |||
| Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | |||
| static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id); | |||
| bool is_weight_mem_has_inited_; | |||
| bool is_feature_map_mem_has_inited_; | |||
| @@ -18,12 +18,22 @@ | |||
| #include "graph/ge_context.h" | |||
| #include "graph/runtime_inference_context.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "mmpa/mmpa_api.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| namespace { | |||
| const int kIntBase = 10; | |||
| const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | |||
| const char *const kIndexId = "index_id"; | |||
| const char *const kModeleId = "model_id"; | |||
| const char *const kTimeStamp = "time_stamp"; | |||
| const char *const kStreamId = "stream_id"; | |||
| const char *const kTaskId = "task_id"; | |||
| const char *const kTagId = "tag_id"; | |||
| const char *const kThreadId = "thread_id"; | |||
| const uint32_t kInteval = 2; | |||
| } // namespace | |||
| HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) | |||
| : model_(model), device_id_(device_id), stream_(stream) { | |||
| @@ -71,15 +81,69 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) { | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||
| uint64_t index_id = context_.iteration + 1; | |||
| uint64_t model_id = static_cast<uint64_t>(model_->GetModelId()); | |||
| GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id = %u, tag_id = %u", | |||
| index_id, model_id, tag_id); | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| // 1000 ^ 3 converts second to nanosecond | |||
| int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); | |||
| Json step_info; | |||
| step_info[kIndexId] = index_id; | |||
| step_info[kModeleId] = model_id; | |||
| step_info[kTimeStamp] = time; | |||
| step_info[kTagId] = tag_id; | |||
| step_info[kTaskId] = task_id; | |||
| step_info[kStreamId] = stream_id; | |||
| step_info[kThreadId] = mmGetTid(); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, "step_info"); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||
| HybridModelExecutor::ExecuteArgs &args) { | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | |||
| // tag_id 0 means step begin, 1 meas step end. | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0)); | |||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | |||
| "Failed to execute partitioned call."); | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | |||
| GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1)); | |||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||
| @@ -51,6 +51,8 @@ class HybridModelExecutor { | |||
| Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); | |||
| Status Cleanup(); | |||
| Status InitExecutionContext(); | |||
| Status ProfileStepInfo(uint16_t tag_id); | |||
| static Status ResetExecutionContext(GraphExecutionContext &context); | |||
| HybridModel *model_; | |||
| @@ -313,6 +313,8 @@ rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; } | |||
| rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; } | |||
| rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_) { return RT_ERROR_NONE; } | |||
| rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; } | |||
| rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) { | |||
| @@ -845,6 +845,7 @@ target_link_libraries(ge_ut_common PRIVATE | |||
| ascend_protobuf | |||
| json | |||
| ge_ut_graph | |||
| runtime_stub | |||
| ) | |||
| # build common format | |||
| @@ -898,6 +898,12 @@ TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
| model.SinkTimeProfile(current_data); | |||
| } | |||
| TEST_F(UtestDavinciModel, Step_info_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| DavinciModel model(0, nullptr); | |||
| EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS); | |||
| } | |||
| class ClassTest { | |||
| public: | |||
| virtual ~ClassTest() {} | |||
| @@ -37,6 +37,7 @@ | |||
| #include "hybrid/common/npu_memory_allocator.h" | |||
| #include "graph/types.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #undef private | |||
| #undef protected | |||
| @@ -46,6 +47,11 @@ using namespace testing; | |||
| using namespace ge; | |||
| using namespace hybrid; | |||
| namespace { | |||
| int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) { | |||
| return 0; | |||
| } | |||
| } // namespace | |||
| class UtestGeHybrid : public testing::Test { | |||
| protected: | |||
| @@ -245,7 +251,7 @@ TEST_F(UtestGeHybrid, init_weight_success) { | |||
| ASSERT_EQ(ret,PARAM_INVALID); | |||
| } | |||
| TEST_F(UtestGeHybrid, hybrid_model_executor) { | |||
| TEST_F(UtestGeHybrid, hybrid_model_executor) { | |||
| ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("abc"); | |||
| GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph); | |||
| HybridModel model(root_model); | |||
| @@ -256,3 +262,12 @@ TEST_F(UtestGeHybrid, init_weight_success) { | |||
| HybridModelExecutor executor(model_ptr, device_id, stream); | |||
| executor.Init(); | |||
| } | |||
| TEST_F(UtestGeHybrid, Step_info_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| auto graph = make_shared<ComputeGraph>("graph"); | |||
| auto root_model = make_shared<GeRootModel>(graph); | |||
| HybridModel model(root_model); | |||
| HybridModelExecutor executor(&model, 0, nullptr); | |||
| ASSERT_EQ(executor.ProfileStepInfo(0), SUCCESS): | |||
| } | |||
| @@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t | |||
| */ | |||
| RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | |||
| /** | |||
| * @ingroup profiling_base | |||
| * @brief ts send keypoint profiler log. | |||
| */ | |||
| RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_); | |||
| /** | |||
| * @ingroup profiling_base | |||
| * @brief ts set profiling reporter callback. | |||