diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index f604aeb8..6c5782f8 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -24,6 +24,7 @@ #include "graph/types.h" #include "runtime/base.h" #include "graph/load/model_manager/davinci_model.h" +#include "mmpa/mmpa_api.h" namespace { const char *const kTrainingTrace = "training_trace"; @@ -47,6 +48,7 @@ const std::string kOptype = "op_type"; const std::string kBlockDim = "block_dims"; const std::string kTaskId = "task_id"; const std::string kStreamId = "stream_id"; +const std::string kThreadId = "thread_id"; const std::string kShapeType = "shape_type"; const std::string kCurIterNum = "cur_iter_num"; const std::string kTaskType = "task_type"; @@ -287,6 +289,56 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin #endif } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfileStepInfo( + uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) { + rtError_t rt_ret = RT_ERROR_NONE; +#ifndef ONLY_COMPILE_OPEN_SRC + GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", + index_id, model_id, tag_id); + rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", + index_id, model_id, tag_id); +#endif + + mmTimespec timespec = mmGetTickCount(); + // 1000 ^ 3 converts second to nanosecond + int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; + uint32_t task_id = 0; + uint32_t stream_id = 0; + rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); + + Json step_info; + step_info[kIndexId] = index_id; + step_info[kModeleId] = model_id; + step_info[kTimeStamp] = time; + step_info[kTagId] = tag_id; + step_info[kTaskId] = task_id; + step_info[kStreamId] = stream_id; + step_info[kThreadId] = mmGetTid(); + + std::string reported_data; + try { + reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); + } catch (std::exception &e) { + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); + } catch (...) { + GELOGE(FAILED, "Failed to convert JSON to string."); + } + reported_data.append(",") + .append("\n"); + ProfilingManager::Instance().ReportData(device_id, reported_data, "step_info"); + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( const int32_t &device_id, const string &data, const string &tag_name) { #ifdef DAVINCI_SUPPORT_PROFILING diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 9dcc5dab..c6dfe841 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -97,6 +97,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { void GetFpBpPoint(std::string &fp_point, std::string &bp_point); void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); + Status ProfileStepInfo(uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id); private: Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); Status ParseOptions(const std::string &options); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 08d60b59..7244d64d 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -2308,61 +2308,6 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { return SUCCESS; } -Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) { - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { - uint64_t index_id = model->iterator_count_ + 1; - uint64_t model_id = static_cast(model->Id()); - rtError_t rt_ret = RT_ERROR_NONE; -#ifndef ONLY_COMPILE_OPEN_SRC - rtStream_t stream = model->rt_model_stream_; - GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", - index_id, model_id, tag_id); - rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", - index_id, model_id, tag_id); -#endif - - mmTimespec timespec = mmGetTickCount(); - // 1000 ^ 3 converts second to nanosecond - int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; - uint32_t task_id = 0; - uint32_t stream_id = 0; - rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); - int32_t device_id = static_cast(model->GetDeviceId()); - - Json step_info; - step_info[kIndexId] = index_id; - step_info[kModeleId] = model_id; - step_info[kTimeStamp] = time; - step_info[kTagId] = tag_id; - step_info[kTaskId] = task_id; - step_info[kStreamId] = stream_id; - step_info[kThreadId] = mmGetTid(); - - std::string reported_data; - try { - reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); - } catch (std::exception &e) { - GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); - } catch (...) { - GELOGE(FAILED, "Failed to convert JSON to string."); - } - reported_data.append(",") - .append("\n"); - ProfilingManager::Instance().ReportData(device_id, reported_data, "step_info"); - } - return SUCCESS; -} - void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { int64_t time = endTime; @@ -2657,8 +2602,6 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("data_wrapper is null!"); continue; } - // tag_id 0 means step begin, 1 meas step end. - (void)DavinciModel::ProfileStepInfo(model, 0); GELOGI("Getting the input data, model_id:%u", model_id); GE_IF_BOOL_EXEC(!model->RunFlag(), break); @@ -2738,8 +2681,6 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); - // tag_id 0 means step begin, 1 meas step end. - (void)DavinciModel::ProfileStepInfo(model, 1); model->iterator_count_++; model->is_first_execute_ = false; @@ -3755,15 +3696,24 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END)); if (!task_list_.empty()) { + uint64_t index_id = iterator_count_ + 1; + uint64_t model_id = static_cast(model_id_); + int32_t device_id = static_cast(device_id_); // tag_id 0 means step begin, 1 meas step end. - GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0)); + if (profiling_model_execute_on) { + GE_CHK_STATUS_RET_NOLOG( + ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); + } GELOGD("rtModelExecute do"); GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); GELOGD("rtModelExecute end"); - GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1)); + if (profiling_model_execute_on) { + GE_CHK_STATUS_RET_NOLOG( + ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); + } iterator_count_++; } diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 1adeb1cc..58478b0f 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -878,8 +878,6 @@ class DavinciModel { Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); - static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id); - bool is_weight_mem_has_inited_; bool is_feature_map_mem_has_inited_; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 3e65b5c1..80dc4184 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -19,21 +19,12 @@ #include "graph/runtime_inference_context.h" #include "common/dump/dump_manager.h" #include "common/profiling/profiling_manager.h" -#include "mmpa/mmpa_api.h" namespace ge { namespace hybrid { namespace { const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; -const char *const kIndexId = "index_id"; -const char *const kModeleId = "model_id"; -const char *const kTimeStamp = "time_stamp"; -const char *const kStreamId = "stream_id"; -const char *const kTaskId = "task_id"; -const char *const kTagId = "tag_id"; -const char *const kThreadId = "thread_id"; -const uint32_t kInteval = 2; } // namespace HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) : model_(model), device_id_(device_id), stream_(stream) { @@ -81,60 +72,6 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } -Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) { - auto &prof_mgr = ProfilingManager::Instance(); - if (prof_mgr.ProfilingModelExecuteOn()) { - uint64_t index_id = context_.iteration + 1; - uint64_t model_id = static_cast(model_->GetModelId()); - rtError_t rt_ret = RT_ERROR_NONE; -#ifndef ONLY_COMPILE_OPEN_SRC - GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", - index_id, model_id, tag_id); - rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", - index_id, model_id, tag_id); -#endif - - mmTimespec timespec = mmGetTickCount(); - // 1000 ^ 3 converts second to nanosecond - int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; - uint32_t task_id = 0; - uint32_t stream_id = 0; - rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); - - Json step_info; - step_info[kIndexId] = index_id; - step_info[kModeleId] = model_id; - step_info[kTimeStamp] = time; - step_info[kTagId] = tag_id; - step_info[kTaskId] = task_id; - step_info[kStreamId] = stream_id; - step_info[kThreadId] = mmGetTid(); - - std::string reported_data; - try { - reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); - } catch (std::exception &e) { - GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); - } catch (...) { - GELOGE(FAILED, "Failed to convert JSON to string."); - } - reported_data.append(",") - .append("\n"); - prof_mgr.ReportData(device_id_, reported_data, "step_info"); - } - return SUCCESS; -} - Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); @@ -142,11 +79,19 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); // tag_id 0 means step begin, 1 meas step end. - GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(0)); + uint64_t index_id = context_.iteration + 1; + uint64_t model_id = static_cast(model_->GetModelId()); + int32_t device_id = static_cast(device_id_); + auto &prof_mgr = ProfilingManager::Instance(); + if (prof_mgr.ProfilingModelExecuteOn()) { + GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); + } HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), "Failed to execute partitioned call."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); - GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(1)); + if (prof_mgr.ProfilingModelExecuteOn()) { + GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); + } HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index af0bd536..d2bd224f 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -51,7 +51,6 @@ class HybridModelExecutor { Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); Status Cleanup(); Status InitExecutionContext(); - Status ProfileStepInfo(uint16_t tag_id); static Status ResetExecutionContext(GraphExecutionContext &context); diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index ae6afcc5..3487f8ed 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -898,13 +898,6 @@ TEST_F(UtestDavinciModel, Sink_time_profile) { model.SinkTimeProfile(current_data); } -TEST_F(UtestDavinciModel, Step_info_profile) { - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; - ProfilingManager::Instance().device_id_.emplace_back(0); - DavinciModel model(0, nullptr); - EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS); -} - class ClassTest { public: virtual ~ClassTest() {} diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index d9ad102a..069cc73c 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -37,7 +37,6 @@ #include "hybrid/common/npu_memory_allocator.h" #include "graph/types.h" #include "graph/utils/tensor_utils.h" -#include "common/profiling/profiling_manager.h" #undef private #undef protected @@ -47,12 +46,6 @@ using namespace testing; using namespace ge; using namespace hybrid; -namespace { -int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) { - return 0; -} -} // namespace - class UtestGeHybrid : public testing::Test { protected: void SetUp() {} @@ -251,7 +244,7 @@ TEST_F(UtestGeHybrid, init_weight_success) { ASSERT_EQ(ret,PARAM_INVALID); } -TEST_F(UtestGeHybrid, hybrid_model_executor) { + TEST_F(UtestGeHybrid, hybrid_model_executor) { ComputeGraphPtr compute_graph = MakeShared("abc"); GeRootModelPtr root_model = MakeShared(compute_graph); HybridModel model(root_model); @@ -261,14 +254,4 @@ TEST_F(UtestGeHybrid, hybrid_model_executor) { rtStream_t stream; HybridModelExecutor executor(model_ptr, device_id, stream); executor.Init(); -} - -TEST_F(UtestGeHybrid, Step_info_profile) { - ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; - ProfilingManager::Instance().device_id_.emplace_back(0); - auto graph = make_shared("graph"); - auto root_model = make_shared(graph); - HybridModel model(root_model); - HybridModelExecutor executor(&model, 0, nullptr); - EXPECT_EQ(executor.ProfileStepInfo(0), SUCCESS); } \ No newline at end of file diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc index 3dfbff41..6bcb23d2 100644 --- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc +++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc @@ -78,3 +78,9 @@ TEST_F(UtestGeProfilinganager, plungin_init_) { EXPECT_EQ(ret, INTERNAL_ERROR); ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr; } + +TEST_F(UtestGeProfilinganager, test_step_info) { + ProfilingManager::Instance().prof_cb_.msprofReporterCallback = ReporterCallback; + EXPECT_EQ(ProfilingManager::Instance().ProfileStepInfo(0, 0, 0, nullptr, 0), SUCCESS); + ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr; +} diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 18550157..3fad74bc 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -74,7 +74,6 @@ enum MsprofReporterCallbackType { MSPROF_REPORTER_REPORT = 0, // report data MSPROF_REPORTER_INIT, // init reporter MSPROF_REPORTER_UNINIT, // uninit reporter - MSPROF_REPORTER_DATA_MAX_LEN, // data max length for calling report callback }; /**