diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index f604aeb8..6c5782f8 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -24,6 +24,7 @@
 #include "graph/types.h"
 #include "runtime/base.h"
 #include "graph/load/model_manager/davinci_model.h"
+#include "mmpa/mmpa_api.h"
 
 namespace {
 const char *const kTrainingTrace = "training_trace";
@@ -47,6 +48,7 @@ const std::string kOptype = "op_type";
 const std::string kBlockDim = "block_dims";
 const std::string kTaskId = "task_id";
 const std::string kStreamId = "stream_id";
+const std::string kThreadId = "thread_id";
 const std::string kShapeType = "shape_type";
 const std::string kCurIterNum = "cur_iter_num";
 const std::string kTaskType = "task_type";
@@ -287,6 +289,56 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
 #endif
 }
 
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfileStepInfo(
+  uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) {
+  rtError_t rt_ret = RT_ERROR_NONE;
+#ifndef ONLY_COMPILE_OPEN_SRC
+  GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
+         index_id, model_id, tag_id);
+  rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+  GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
+         index_id, model_id, tag_id);
+#endif
+
+  mmTimespec timespec = mmGetTickCount();
+  // 1000 ^ 3 converts second to nanosecond
+  int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
+  uint32_t task_id = 0;
+  uint32_t stream_id = 0;
+  rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+  GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
+
+  Json step_info;
+  step_info[kIndexId] = index_id;
+  step_info[kModeleId] = model_id;
+  step_info[kTimeStamp] = time;
+  step_info[kTagId] = tag_id;
+  step_info[kTaskId] = task_id;
+  step_info[kStreamId] = stream_id;
+  step_info[kThreadId] = mmGetTid();
+
+  std::string reported_data;
+  try {
+    reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
+  } catch (std::exception &e) {
+    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
+  } catch (...) {
+    GELOGE(FAILED, "Failed to convert JSON to string.");
+  }
+  reported_data.append(",")
+               .append("\n");
+  ProfilingManager::Instance().ReportData(device_id, reported_data, "step_info");
+  return SUCCESS;
+}
+
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData(
     const int32_t &device_id, const string &data, const string &tag_name) {
 #ifdef DAVINCI_SUPPORT_PROFILING
diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h
index 9dcc5dab..c6dfe841 100755
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -97,6 +97,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
   void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
   void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
+  Status ProfileStepInfo(uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id);
  private:
   Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
   Status ParseOptions(const std::string &options);
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 08d60b59..7244d64d 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -2308,61 +2308,6 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
   return SUCCESS;
 }
 
-Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) {
-  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
-    uint64_t index_id = model->iterator_count_ + 1;
-    uint64_t model_id = static_cast<uint64_t>(model->Id());
-    rtError_t rt_ret = RT_ERROR_NONE;
-#ifndef ONLY_COMPILE_OPEN_SRC
-    rtStream_t stream = model->rt_model_stream_;
-    GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
-           index_id, model_id, tag_id);
-    rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-    GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
-           index_id, model_id, tag_id);
-#endif
-
-    mmTimespec timespec = mmGetTickCount();
-    // 1000 ^ 3 converts second to nanosecond
-    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
-    uint32_t task_id = 0;
-    uint32_t stream_id = 0;
-    rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-    GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
-    int32_t device_id = static_cast<int32_t>(model->GetDeviceId());
-
-    Json step_info;
-    step_info[kIndexId] = index_id;
-    step_info[kModeleId] = model_id;
-    step_info[kTimeStamp] = time;
-    step_info[kTagId] = tag_id;
-    step_info[kTaskId] = task_id;
-    step_info[kStreamId] = stream_id;
-    step_info[kThreadId] = mmGetTid();
-
-    std::string reported_data;
-    try {
-      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
-    } catch (std::exception &e) {
-      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
-    } catch (...) {
-      GELOGE(FAILED, "Failed to convert JSON to string.");
-    }
-    reported_data.append(",")
-                 .append("\n");
-    ProfilingManager::Instance().ReportData(device_id, reported_data, "step_info");
-  }
-  return SUCCESS;
-}
-
 void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
   int64_t time = endTime;
 
@@ -2657,8 +2602,6 @@ void *DavinciModel::Run(DavinciModel *model) {
       GELOGI("data_wrapper is null!");
       continue;
     }
-    // tag_id 0 means step begin, 1 meas step end.
-    (void)DavinciModel::ProfileStepInfo(model, 0);
     GELOGI("Getting the input data, model_id:%u", model_id);
     GE_IF_BOOL_EXEC(!model->RunFlag(), break);
 
@@ -2738,8 +2681,6 @@ void *DavinciModel::Run(DavinciModel *model) {
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
                     model->SetProfileTime(MODEL_AFTER_PROC_END));
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));
-    // tag_id 0 means step begin, 1 meas step end.
-    (void)DavinciModel::ProfileStepInfo(model, 1);
 
     model->iterator_count_++;
     model->is_first_execute_ = false;
@@ -3755,15 +3696,24 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
   GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END));
 
   if (!task_list_.empty()) {
+    uint64_t index_id = iterator_count_ + 1;
+    uint64_t model_id = static_cast<uint64_t>(model_id_);
+    int32_t device_id = static_cast<int32_t>(device_id_);
     // tag_id 0 means step begin, 1 meas step end.
-    GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0));
+    if (profiling_model_execute_on) {
+      GE_CHK_STATUS_RET_NOLOG(
+        ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id));
+    }             
     GELOGD("rtModelExecute do");
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START));
     rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
     GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END));
     GELOGD("rtModelExecute end");
-    GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1));
+    if (profiling_model_execute_on) {
+      GE_CHK_STATUS_RET_NOLOG(
+        ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id));
+    }   
     iterator_count_++;
   }
 
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 1adeb1cc..58478b0f 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -878,8 +878,6 @@ class DavinciModel {
   Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
   Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
 
-  static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id);
-
   bool is_weight_mem_has_inited_;
   bool is_feature_map_mem_has_inited_;
 
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index 3e65b5c1..80dc4184 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -19,21 +19,12 @@
 #include "graph/runtime_inference_context.h"
 #include "common/dump/dump_manager.h"
 #include "common/profiling/profiling_manager.h"
-#include "mmpa/mmpa_api.h"
 
 namespace ge {
 namespace hybrid {
 namespace {
 const int kIntBase = 10;
 const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
-const char *const kIndexId = "index_id";
-const char *const kModeleId = "model_id";
-const char *const kTimeStamp = "time_stamp";
-const char *const kStreamId = "stream_id";
-const char *const kTaskId = "task_id";
-const char *const kTagId = "tag_id";
-const char *const kThreadId = "thread_id";
-const uint32_t kInteval = 2;
 } // namespace
 HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream)
     : model_(model), device_id_(device_id), stream_(stream) {
@@ -81,60 +72,6 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
   return SUCCESS;
 }
 
-Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) {
-  auto &prof_mgr = ProfilingManager::Instance();
-  if (prof_mgr.ProfilingModelExecuteOn()) {
-    uint64_t index_id = context_.iteration + 1;
-    uint64_t model_id = static_cast<uint64_t>(model_->GetModelId());
-    rtError_t rt_ret = RT_ERROR_NONE;
-#ifndef ONLY_COMPILE_OPEN_SRC
-    GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
-           index_id, model_id, tag_id);
-    rt_ret = rtProfilerTraceEx(index_id,  model_id, tag_id, stream_);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-    GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
-           index_id, model_id, tag_id);
-#endif
-
-    mmTimespec timespec = mmGetTickCount();
-    // 1000 ^ 3 converts second to nanosecond
-    int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
-    uint32_t task_id = 0;
-    uint32_t stream_id = 0;
-    rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-    GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
-
-    Json step_info;
-    step_info[kIndexId] = index_id;
-    step_info[kModeleId] = model_id;
-    step_info[kTimeStamp] = time;
-    step_info[kTagId] = tag_id;
-    step_info[kTaskId] = task_id;
-    step_info[kStreamId] = stream_id;
-    step_info[kThreadId] = mmGetTid();
-
-    std::string reported_data;
-    try {
-      reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
-    } catch (std::exception &e) {
-      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
-    } catch (...) {
-      GELOGE(FAILED, "Failed to convert JSON to string.");
-    }
-    reported_data.append(",")
-                 .append("\n");
-    prof_mgr.ReportData(device_id_, reported_data, "step_info");
-  }
-  return SUCCESS;
-}
-
 Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
                                                  HybridModelExecutor::ExecuteArgs &args) {
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start");
@@ -142,11 +79,19 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");
 
   // tag_id 0 means step begin, 1 meas step end.
-  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(0));
+  uint64_t index_id = context_.iteration + 1;
+  uint64_t model_id = static_cast<uint64_t>(model_->GetModelId());
+  int32_t device_id = static_cast<int32_t>(device_id_);
+  auto &prof_mgr = ProfilingManager::Instance();
+  if (prof_mgr.ProfilingModelExecuteOn()) {
+    GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id));
+  }
   HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
                         "Failed to execute partitioned call.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");
-  GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(1));
+  if (prof_mgr.ProfilingModelExecuteOn()) {
+    GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id));
+  }
 
   HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h
index af0bd536..d2bd224f 100644
--- a/ge/hybrid/executor/hybrid_model_executor.h
+++ b/ge/hybrid/executor/hybrid_model_executor.h
@@ -51,7 +51,6 @@ class HybridModelExecutor {
   Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args);
   Status Cleanup();
   Status InitExecutionContext();
-  Status ProfileStepInfo(uint16_t tag_id);
 
   static Status ResetExecutionContext(GraphExecutionContext &context);
 
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index ae6afcc5..3487f8ed 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -898,13 +898,6 @@ TEST_F(UtestDavinciModel, Sink_time_profile) {
   model.SinkTimeProfile(current_data);
 }
 
-TEST_F(UtestDavinciModel, Step_info_profile) {
-  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
-  ProfilingManager::Instance().device_id_.emplace_back(0);
-  DavinciModel model(0, nullptr);
-  EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS);
-}
-
 class ClassTest {
 public:
     virtual ~ClassTest() {}
diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
index d9ad102a..069cc73c 100644
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -37,7 +37,6 @@
 #include "hybrid/common/npu_memory_allocator.h"
 #include "graph/types.h"
 #include "graph/utils/tensor_utils.h"
-#include "common/profiling/profiling_manager.h"
 
 #undef private
 #undef protected
@@ -47,12 +46,6 @@ using namespace testing;
 using namespace ge;
 using namespace hybrid;
 
-namespace {
-int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) {
-  return 0;
-}
-}  // namespace
-
 class UtestGeHybrid : public testing::Test {
  protected:
   void SetUp() {}
@@ -251,7 +244,7 @@ TEST_F(UtestGeHybrid, init_weight_success) {
   ASSERT_EQ(ret,PARAM_INVALID);
 }
 
-TEST_F(UtestGeHybrid, hybrid_model_executor) {
+  TEST_F(UtestGeHybrid, hybrid_model_executor) {
   ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("abc");
   GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
   HybridModel model(root_model);
@@ -261,14 +254,4 @@ TEST_F(UtestGeHybrid, hybrid_model_executor) {
   rtStream_t stream;
   HybridModelExecutor executor(model_ptr, device_id, stream);
   executor.Init();
-}
-
-TEST_F(UtestGeHybrid, Step_info_profile) {
-  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
-  ProfilingManager::Instance().device_id_.emplace_back(0);
-  auto graph = make_shared<ComputeGraph>("graph");
-  auto root_model = make_shared<GeRootModel>(graph);
-  HybridModel model(root_model);
-  HybridModelExecutor executor(&model, 0, nullptr);
-  EXPECT_EQ(executor.ProfileStepInfo(0), SUCCESS);
 }
\ No newline at end of file
diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc
index 3dfbff41..6bcb23d2 100644
--- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc
+++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc
@@ -78,3 +78,9 @@ TEST_F(UtestGeProfilinganager, plungin_init_) {
   EXPECT_EQ(ret, INTERNAL_ERROR);
   ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr;
 }
+
+TEST_F(UtestGeProfilinganager, test_step_info) {
+  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = ReporterCallback;
+  EXPECT_EQ(ProfilingManager::Instance().ProfileStepInfo(0, 0, 0, nullptr, 0), SUCCESS);
+  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr;
+}
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index 18550157..3fad74bc 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -74,7 +74,6 @@ enum MsprofReporterCallbackType {
     MSPROF_REPORTER_REPORT = 0,           // report data
     MSPROF_REPORTER_INIT,                 // init reporter
     MSPROF_REPORTER_UNINIT,               // uninit reporter
-    MSPROF_REPORTER_DATA_MAX_LEN,         // data max length for calling report callback
 };
 
 /**