diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 9d1ba0c2..8fd38324 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -110,6 +110,9 @@ const char *const kWorkSpaceSize = "workspace_size"; const char *const kTotalSize = "total_size"; const char *const kTaskCount = "task_count"; const char *const kTaskId = "task_id"; +const char *const kIndexId = "index_id"; +const char *const kTimeStamp = "time_stamp"; +const char *const kTagId = "tag_id"; const char* const kRequestId = "request_id"; const char* const kThreadId = "thread_id"; const char* const kInputBeginTime = "input_begin_time"; @@ -2266,6 +2269,59 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { return SUCCESS; } +Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) { + auto &prof_mgr = ProfilingManager::Instance(); + if (prof_mgr.ProfilingModelExecuteOn()) { + uint64_t index_id = model->iterator_count_ + 1; + uint64_t model_id = static_cast(model->Id()); + rtStream_t stream = model->rt_model_stream_; + GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", + index_id, model_id, tag_id); + rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", + index_id, model_id, tag_id); + + mmTimespec timespec = mmGetTickCount(); + // 1000 ^ 3 converts second to nanosecond + int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Get profiling args, task id[%u], stream id[%u]", task_id, stream_id); + uint32_t device_id = model->GetDeviceId(); + + Json step_info; + step_info[kIndexId] = index_id; + step_info[kModeleId] = model_id; + step_info[kTimeStamp] = time; + step_info[kTagId] = tag_id; + step_info[kTaskId] = task_id; + step_info[kStreamId] = stream_id; + step_info[kThreadId] = mmGetTid(); + + std::string reported_data; + try { + reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id, reported_data, "step_info"); + } + return SUCCESS; +} + void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { int64_t time = endTime; @@ -2560,6 +2616,8 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("data_wrapper is null!"); continue; } + // tag_id 0 means step begin, 1 meas step end. + (void)ProfileStepInfo(model, 0); GELOGI("Getting the input data, model_id:%u", model_id); GE_IF_BOOL_EXEC(!model->RunFlag(), break); @@ -2639,6 +2697,8 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); + // tag_id 0 means step begin, 1 meas step end. + (void)ProfileStepInfo(model, 1); model->iterator_count_++; model->is_first_execute_ = false; @@ -3654,6 +3714,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GELOGD("current_data.index=%u", input_data.index); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); + // tag_id 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0)); if (!task_list_.empty()) { GELOGD("rtModelExecute do"); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); @@ -3661,7 +3723,9 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); GELOGD("rtModelExecute end"); + iterator_count_++; } + GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1)); if (!is_async_mode_) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 70c0f687..b6da6e01 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -876,6 +876,8 @@ class DavinciModel { Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); + static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id); + bool is_weight_mem_has_inited_; bool is_feature_map_mem_has_inited_; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 80b8983a..fb8f583f 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -15,15 +15,27 @@ */ #include "hybrid_model_executor.h" +#include + #include "graph/ge_context.h" #include "graph/runtime_inference_context.h" #include "common/dump/dump_manager.h" +#include "common/profiling/profiling_manager.h" +#include "mmpa/mmpa_api.h" namespace ge { namespace hybrid { namespace { const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; +const char *const kIndexId = "index_id"; +const char *const kModeleId = "model_id"; +const char *const kTimeStamp = "time_stamp"; +const char *const kStreamId = "stream_id"; +const char *const kTaskId = "task_id"; +const char *const kTagId = "tag_id"; +const char *const kThreadId = "thread_id"; +const uint32_t kInteval = 2; } // namespace HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) : model_(model), device_id_(device_id), stream_(stream) { @@ -48,7 +60,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { GE_CHECK_NOTNULL(root_graph_item); SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); + // tag_id is 0 means step begin, 1 meas step end. + GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0)); auto ret = ExecuteGraphInternal(executor, args); + GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1)); Cleanup(); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); GELOGD("Model executed successfully."); @@ -66,6 +81,57 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } +Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) { + auto &prof_mgr = ProfilingManager::Instance(); + if (prof_mgr.ProfilingModelExecuteOn()) { + uint64_t index_id = context_.iteration + 1; + uint64_t model_id = static_cast(model->Id()); + GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", + index_id, model_id, tag_id); + rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id = %u, tag_id = %u", + index_id, model_id, tag_id); + + mmTimespec timespec = mmGetTickCount(); + // 1000 ^ 3 converts second to nanosecond + int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Get profiling args, task id[%u], stream id[%u]", task_id, stream_id); + + Json step_info; + step_info[kIndexId] = index_id; + step_info[kModeleId] = model_id; + step_info[kTimeStamp] = time; + step_info[kTagId] = tag_id; + step_info[kTaskId] = task_id; + step_info[kStreamId] = stream_id; + tep_info[kThreadId] = mmGetTid(); + + std::string reported_data; + try { + reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + prof_mgr.ReportData(device_id_, reported_data, "step_info"); + } + return SUCCESS; +} + Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 0b2cd1ed..af0bd536 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -51,6 +51,8 @@ class HybridModelExecutor { Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); Status Cleanup(); Status InitExecutionContext(); + Status ProfileStepInfo(uint16_t tag_id); + static Status ResetExecutionContext(GraphExecutionContext &context); HybridModel *model_; diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index f4271551..f16e7912 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = "dynamic"; tmp_task_desc_info.cur_iter_num = iteration_ + 1; - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info_.emplace_back(tmp_task_desc_info); } return SUCCESS; diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index e00c5048..b0b8d9ec 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -116,10 +116,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; - const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info_; } Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, uint32_t block_dim); - void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); } private: TaskContext(GraphExecutionContext *execution_context, @@ -141,7 +141,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; - std::vector task_desc_info; + std::vector task_desc_info_; bool is_over_flow_ = false; }; } // namespace hybrid diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 440b98e7..735ec29d 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -313,6 +313,8 @@ rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; } rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; } +rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_) { return RT_ERROR_NONE; } + rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; } rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) { diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 80636a20..f3bab951 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -842,6 +842,7 @@ target_link_libraries(ge_ut_common PRIVATE ascend_protobuf json ge_ut_graph + runtime_stub ) # build common format diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 18cc622b..457c3b2e 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -898,6 +898,12 @@ TEST_F(UtestDavinciModel, Sink_time_profile) { model.SinkTimeProfile(current_data); } +TEST_F(UtestDavinciModel, Step_info_profile) { + ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + DavinciModel model(0, nullptr); + EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS); +} + class ClassTest { public: virtual ~ClassTest() {} diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index d7116dbc..48605dea 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -22,6 +22,7 @@ #define protected public #define private public #include "hybrid/model/hybrid_model_builder.h" +#include "hybrid/executor/hybrid_model_executor.h" #include "hybrid/model/hybrid_model.h" #include "model/ge_model.h" #include "model/ge_root_model.h" @@ -36,6 +37,7 @@ #include "hybrid/common/npu_memory_allocator.h" #include "graph/types.h" #include "graph/utils/tensor_utils.h" +#include "common/profiling/profiling_manager.h" #undef private #undef protected @@ -45,6 +47,11 @@ using namespace testing; using namespace ge; using namespace hybrid; +namespace { +int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) { + return 0; +} +} // namespace class UtestGeHybrid : public testing::Test { protected: @@ -242,4 +249,13 @@ TEST_F(UtestGeHybrid, init_weight_success) { ge_sub_model->SetWeight(weight_buffer); ret = hybrid_model_builder.InitWeights(); ASSERT_EQ(ret,PARAM_INVALID); +} + +TEST_F(UtestGeHybrid, Step_info_profile) { + ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; + auto graph = make_shared("graph"); + auto root_model = make_shared(graph); + HybridModel model(model); + HybridModelExecutor executor(&model, 0, nullptr); + ASSERT_EQ(executor.ProfileStepInfo(), SUCCESS): } \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 5b246eed..2c62b0dd 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts send keypoint profiler log. + */ +RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_); + /** * @ingroup profiling_base * @brief ts set profiling reporter callback.