From 0a1b1d3a7038950ddc7acaa3e6bc77aa99d50310 Mon Sep 17 00:00:00 2001
From: zhengyuanhua <zhengyuanhua1@huawei.com>
Date: Mon, 12 Apr 2021 15:28:03 +0800
Subject: [PATCH] aic error

---
 ge/CMakeLists.txt                             |   2 +
 ge/common/debug/memory_dumper.cc              |   2 +-
 ge/common/dump/exception_dumper.cc            | 241 ++++++++++++++++++
 ge/common/dump/exception_dumper.h             |  48 ++++
 ge/executor/CMakeLists.txt                    |   1 +
 ge/graph/load/model_manager/data_dumper.cc    | 171 -------------
 ge/graph/load/model_manager/data_dumper.h     |   8 -
 ge/graph/load/model_manager/davinci_model.cc  |  38 ++-
 ge/graph/load/model_manager/davinci_model.h   |  11 +-
 ge/graph/load/model_manager/model_manager.cc  |  18 +-
 ge/graph/load/model_manager/model_manager.h   |   2 +
 .../task_info/kernel_ex_task_info.cc          |   1 +
 .../task_info/kernel_task_info.cc             |   1 +
 .../executor/hybrid_execution_context.cc      |  22 ++
 ge/hybrid/executor/hybrid_execution_context.h |   4 +
 .../executor/hybrid_model_async_executor.h    |   2 +
 ge/hybrid/executor/hybrid_model_executor.cc   |  14 +-
 .../hybrid_model_pipeline_executor.cc         |   8 +
 ge/hybrid/executor/worker/execution_engine.cc |  42 +++
 ge/hybrid/hybrid_davinci_model.cc             |  28 ++
 ge/hybrid/hybrid_davinci_model.h              |   4 +
 ge/hybrid/hybrid_davinci_model_stub.cc        |   8 +
 .../aicore/aicore_node_executor.cc            |   2 +
 .../aicpu/aicpu_node_executor.cc              |   2 +
 .../compiledsubgraph/known_node_executor.cc   |   7 +
 .../compiledsubgraph/known_node_executor.h    |   1 +
 tests/ut/ge/CMakeLists.txt                    |   2 +
 tests/ut/ge/common/dump_exception_unittest.cc |  54 ++++
 .../ge/graph/load/davinci_model_unittest.cc   |  13 +
 29 files changed, 568 insertions(+), 189 deletions(-)
 create mode 100644 ge/common/dump/exception_dumper.cc
 create mode 100644 ge/common/dump/exception_dumper.h
 create mode 100644 tests/ut/ge/common/dump_exception_unittest.cc

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 9b979200..a081419b 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -108,6 +108,7 @@ set(TRAIN_SRC_LIST
     "common/helper/model_cache_helper.cc"
     "common/profiling/profiling_manager.cc"
     "common/dump/dump_manager.cc"
+    "common/dump/exception_dumper.cc"
     "common/dump/dump_properties.cc"
     "common/dump/opdebug_register.cc"
     "common/dump/dump_op.cc"
@@ -432,6 +433,7 @@ set(INFER_SRC_LIST
     "common/formats/formats.cc"
     "common/profiling/profiling_manager.cc"
     "common/dump/dump_properties.cc"
+    "common/dump/exception_dumper.cc"
     "common/dump/dump_manager.cc"
     "common/dump/dump_op.cc"
     "common/dump/opdebug_register.cc"
diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc
index 527f0bb2..e6a090f2 100644
--- a/ge/common/debug/memory_dumper.cc
+++ b/ge/common/debug/memory_dumper.cc
@@ -155,7 +155,7 @@ int MemoryDumper::OpenFile(const char *filename) {
   // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability.
   mmMode_t mode = M_IRUSR | M_IWUSR;
 
-  int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | O_TRUNC, mode);
+  int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | M_APPEND, mode);
   if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
     GELOGE(kInvalidFd, "open file failed. errno = %d, %s", fd, strerror(errno));
     return kInvalidFd;
diff --git a/ge/common/dump/exception_dumper.cc b/ge/common/dump/exception_dumper.cc
new file mode 100644
index 00000000..bed389a7
--- /dev/null
+++ b/ge/common/dump/exception_dumper.cc
@@ -0,0 +1,241 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/dump/exception_dumper.h"
+
+#include "common/ge/datatype_util.h"
+#include "common/debug/memory_dumper.h"
+#include "framework/common/debug/log.h"
+#include "graph/manager/util/debug.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/load/model_manager/model_utils.h"
+#include "proto/dump_task.pb.h"
+
+namespace {
+static uint64_t GetNowTime() {
+  uint64_t ret = 0;
+  mmTimeval tv;
+  if (mmGetTimeOfDay(&tv, nullptr) == 0) {
+    ret = tv.tv_sec * 1000000ULL + tv.tv_usec;
+  }
+
+  return ret;
+}
+
+static void ReplaceStringElem(std::string &str) {
+  for_each(str.begin(), str.end(), [](char &ch) {
+    if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) {
+      ch = '_';
+    }
+  });
+}
+
+static void SetDumpData(const ge::OpDescInfo &op_desc_info, toolkit::dumpdata::DumpData &dump_data) {
+  dump_data.set_version("2.0");
+  dump_data.set_dump_time(GetNowTime());
+  dump_data.set_op_name(op_desc_info.op_name);
+  for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
+    toolkit::dumpdata::OpInput input;
+    input.set_data_type(toolkit::dumpdata::OutputDataType(
+        ge::DataTypeUtil::GetIrDataType(op_desc_info.input_data_type[i])));
+    input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i]));
+    for (auto dim : op_desc_info.input_shape[i]) {
+      input.mutable_shape()->add_dim(dim);
+    }
+    input.set_size(op_desc_info.input_size[i]);
+    GELOGI("[Set][DumpData] The input size int exception is %ld", op_desc_info.input_size[i]);
+    dump_data.mutable_input()->Add(std::move(input));
+  }
+
+  for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) {
+    toolkit::dumpdata::OpOutput output;
+    output.set_data_type(toolkit::dumpdata::OutputDataType(
+        ge::DataTypeUtil::GetIrDataType(op_desc_info.output_data_type[j])));
+    output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j]));
+    for (auto dim : op_desc_info.output_shape[j]) {
+      output.mutable_shape()->add_dim(dim);
+    }
+    output.set_size(op_desc_info.output_size[j]);
+    GELOGI("[Set][DumpData] The output size int exception is %ld", op_desc_info.output_size[j]);
+    dump_data.mutable_output()->Add(std::move(output));
+  }
+}
+}  // namespace
+
+namespace ge {
+ExceptionDumper::~ExceptionDumper() {}
+
+void ExceptionDumper::SaveDumpOpInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id,
+                                     vector<void *> &input_addrs, vector<void *> &output_addrs) {
+  OpDescInfo op_desc_info;
+  SaveOpDescInfo(op, task_id, stream_id, op_desc_info);
+  op_desc_info.input_addrs = input_addrs;
+  op_desc_info.output_addrs = output_addrs;
+  op_desc_info_.emplace_back(std::move(op_desc_info));
+}
+
+void ExceptionDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op,
+                                     uint32_t task_id, uint32_t stream_id) {
+  OpDescInfo op_desc_info;
+  SaveOpDescInfo(op, task_id, stream_id, op_desc_info);
+  op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
+  op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op);
+  op_desc_info_.emplace_back(std::move(op_desc_info));
+}
+
+void ExceptionDumper::SaveOpDescInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id,
+                                     OpDescInfo &op_desc_info) {
+  if (op == nullptr) {
+    GELOGW("[Save][OpExceptionInfo] op desc ptr is null.");
+    return;
+  }
+  GELOGD("[Save][OpExceptionInfo] Start to save dump op [%s] info of task_id: %u, stream_id: %u",
+         op->GetName().c_str(), task_id, stream_id);
+  op_desc_info.op_name = op->GetName();
+  op_desc_info.op_type = op->GetType();
+  op_desc_info.task_id = task_id;
+  op_desc_info.stream_id = stream_id;
+  for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
+    GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
+    if (input_tensor_desc == nullptr) {
+      continue;
+    }
+    op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat());
+    op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
+    op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType());
+    int64_t input_size = 0;
+
+    if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) {
+      GELOGW("[Save][OpExceptionInfo] Op [%s] get input size failed.", op->GetName().c_str());
+      return;
+    }
+    GELOGD("[Save][OpExceptionInfo] Save dump op info, the input size is %ld", input_size);
+    op_desc_info.input_size.emplace_back(input_size);
+  }
+  for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
+    GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
+    if (output_tensor_desc == nullptr) {
+      continue;
+    }
+    op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat());
+    op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
+    op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType());
+    int64_t output_size = 0;
+    if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) {
+      GELOGW("[Save][OpExceptionInfo] Op [%s] get output size failed.", op->GetName().c_str());
+      return;
+    }
+    GELOGD("[Save][OpExceptionInfo] Save dump op info, the output size is %ld.", output_size);
+    op_desc_info.output_size.emplace_back(output_size);
+  }
+}
+
+Status ExceptionDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const {
+  GELOGI("[Dump][Exception] Start to dump exception info");
+  for (const rtExceptionInfo &iter : exception_infos) {
+    OpDescInfo op_desc_info;
+    if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) {
+      toolkit::dumpdata::DumpData dump_data;
+      SetDumpData(op_desc_info, dump_data);
+      uint64_t now_time = GetNowTime();
+      std::string op_name = op_desc_info.op_name;
+      std::string op_type = op_desc_info.op_type;
+      ReplaceStringElem(op_name);
+      ReplaceStringElem(op_type);
+      string dump_file_path =
+        "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
+      GELOGI("[Dump][Exception] The exception dump file path is %s", dump_file_path.c_str());
+
+      uint64_t proto_size = dump_data.ByteSizeLong();
+      std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
+      bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
+      if (!ret || proto_size == 0) {
+        REPORT_INNER_ERROR("E19999", "Serialize proto to string fail");
+        GELOGE(PARAM_INVALID, "[Dump][Exception] Dump data proto serialize failed");
+        return PARAM_INVALID;
+      }
+
+      GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)),
+                        "Failed to dump proto size");
+      GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size),
+                        "Failed to dump proto msg");
+      if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) {
+        GELOGE(PARAM_INVALID, "[Dump][Exception] Dump exception input failed");
+        return PARAM_INVALID;
+      }
+
+      if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) {
+        GELOGE(PARAM_INVALID, "[Dump][Exception] Dump exception output failed");
+        return PARAM_INVALID;
+      }
+      GELOGI("[Dump][Exception] Dump exception info SUCCESS");
+    } else {
+      GELOGE(PARAM_INVALID, "[Dump][Exception] Get op desc info failed,task id:%u,stream id:%u",
+             iter.taskid, iter.streamid);
+      return PARAM_INVALID;
+    }
+  }
+  return SUCCESS;
+}
+
+bool ExceptionDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
+  GELOGI("[Get][OpDescInfo] There are %zu op need to dump.", op_desc_info_.size());
+  for (size_t index = 0; index < op_desc_info_.size(); ++index) {
+    OpDescInfo dump_op_info = op_desc_info_.at(index);
+    if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) {
+      GELOGI("[Get][OpDescInfo] Find exception op [%s] of task_id: %u, stream_id: %u.",
+             dump_op_info.op_name.c_str(), task_id, stream_id);
+      op_desc_info = dump_op_info;
+      return true;
+    }
+  }
+  return false;
+}
+
+Status ExceptionDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) const {
+  GELOGI("[Dump][ExceptionInput] Start to dump exception input");
+  for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) {
+    if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) {
+      GELOGE(PARAM_INVALID, "[Dump][ExceptionInput] Dump the %zu input data of op [%s] failed",
+             i, op_desc_info.op_name.c_str());
+      return PARAM_INVALID;
+    }
+  }
+  return SUCCESS;
+}
+
+Status ExceptionDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) const {
+  GELOGI("[Dump][ExceptionOutput] Start to dump exception output");
+  for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) {
+    if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) !=
+        SUCCESS) {
+      GELOGE(PARAM_INVALID, "[Dump][ExceptionInput] Dump the %zu input data of op [%s] failed",
+             i, op_desc_info.op_name.c_str());
+      return PARAM_INVALID;
+    }
+  }
+  return SUCCESS;
+}
+
+OpDescInfo *ExceptionDumper::MutableOpDescInfo(uint32_t task_id, uint32_t stream_id) {
+  for (OpDescInfo &op_desc_info : op_desc_info_) {
+    if (op_desc_info.task_id == task_id && op_desc_info.stream_id == stream_id) {
+      return &op_desc_info;
+    }
+  }
+  return nullptr;
+}
+}  // namespace ge
\ No newline at end of file
diff --git a/ge/common/dump/exception_dumper.h b/ge/common/dump/exception_dumper.h
new file mode 100644
index 00000000..38a3f26e
--- /dev/null
+++ b/ge/common/dump/exception_dumper.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_COMMON_DUMP_EXCEPTION_DUMPER_H_
+#define GE_COMMON_DUMP_EXCEPTION_DUMPER_H_
+
+#include <vector>
+
+#include "graph/op_desc.h"
+#include "framework/common/ge_types.h"
+#include "graph/load/model_manager/task_info/task_info.h"
+
+namespace ge {
+class ExceptionDumper {
+ public:
+  ExceptionDumper() = default;
+  ~ExceptionDumper();
+
+  void SaveDumpOpInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id,
+                      std::vector<void *> &input_addrs, std::vector<void *> &output_addrs);
+  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id);
+  Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const;
+  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
+  OpDescInfo *MutableOpDescInfo(uint32_t task_id, uint32_t stream_id);
+
+ private:
+  void SaveOpDescInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id, OpDescInfo &op_desc_info);
+  Status DumpExceptionInput(const OpDescInfo &op_desc_info, const std::string &dump_file) const;
+  Status DumpExceptionOutput(const OpDescInfo &op_desc_info, const std::string &dump_file) const;
+
+  std::vector<OpDescInfo> op_desc_info_;
+};
+}  // namespace ge
+
+#endif // GE_COMMON_DUMP_EXCEPTION_DUMPER_H_
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 363900d0..d78e8ed3 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRC_LIST
     "../common/ge/plugin_manager.cc"
     "../common/ge/op_tiling_manager.cc"
     "../common/dump/dump_properties.cc"
+    "../common/dump/exception_dumper.cc"
     "../common/dump/dump_manager.cc"
     "../common/dump/dump_op.cc"
     "../common/dump/opdebug_register.cc"
diff --git a/ge/graph/load/model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc
index 5f48fe8e..1baebb00 100644
--- a/ge/graph/load/model_manager/data_dumper.cc
+++ b/ge/graph/load/model_manager/data_dumper.cc
@@ -72,24 +72,6 @@ static bool ParseNameIndex(const std::string &node_name_index, std::string &node
 static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) {
   return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1);
 }
-
-static uint64_t GetNowTime() {
-  uint64_t ret = 0;
-  mmTimeval tv;
-  if (mmGetTimeOfDay(&tv, nullptr) == 0) {
-    ret = tv.tv_sec * 1000000ULL + tv.tv_usec;
-  }
-
-  return ret;
-}
-
-static void ReplaceStringElem(std::string &str) {
-  for_each(str.begin(), str.end(), [](char &ch) {
-    if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) {
-      ch = '_';
-    }
-  });
-}
 }  // namespace
 
 static int32_t GetIrDataType(ge::DataType data_type) {
@@ -194,66 +176,6 @@ void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_de
   is_op_debug_ = is_op_debug;
 }
 
-void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id,
-                                uint32_t stream_id) {
-  GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id);
-  OpDescInfo op_desc_info;
-  op_desc_info.op_name = op->GetName();
-  op_desc_info.op_type = op->GetType();
-  op_desc_info.task_id = task_id;
-  op_desc_info.stream_id = stream_id;
-  for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
-    GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
-    if (input_tensor_desc == nullptr) {
-      continue;
-    }
-    op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat());
-    op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
-    op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType());
-    int64_t input_size = 0;
-
-    if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) {
-      GELOGW("Get input size failed");
-      return;
-    }
-    GELOGD("Save dump op info, the input size is %ld", input_size);
-    op_desc_info.input_size.emplace_back(input_size);
-  }
-  for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
-    GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
-    if (output_tensor_desc == nullptr) {
-      continue;
-    }
-    op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat());
-    op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
-    op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType());
-    int64_t output_size = 0;
-    if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) {
-      GELOGW("Get input size failed");
-      return;
-    }
-    GELOGD("Save dump op info, the output size is %ld", output_size);
-    op_desc_info.output_size.emplace_back(output_size);
-  }
-  op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
-  op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op);
-
-  op_desc_info_.emplace_back(op_desc_info);
-}
-
-bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
-  GELOGI("There are %zu op need to dump.", op_desc_info_.size());
-  for (size_t index = 0; index < op_desc_info_.size(); ++index) {
-    OpDescInfo dump_op_info = op_desc_info_.at(index);
-    if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) {
-      GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id);
-      op_desc_info = dump_op_info;
-      return true;
-    }
-  }
-  return false;
-}
-
 void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
                               uintptr_t args) {
   if (op_desc == nullptr) {
@@ -873,97 +795,4 @@ void DataDumper::PrintCheckLog(string &dump_list_key) {
     }
   }
 }
-
-Status DataDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) {
-  GELOGI("Start to dump exception input");
-  for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) {
-    if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) {
-      GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
-      return PARAM_INVALID;
-    }
-  }
-  return SUCCESS;
-}
-
-Status DataDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) {
-  GELOGI("Start to dump exception output");
-  for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) {
-    if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) !=
-        SUCCESS) {
-      GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
-      return PARAM_INVALID;
-    }
-  }
-  return SUCCESS;
-}
-
-Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos) {
-  GELOGI("Start to dump exception info");
-  for (const rtExceptionInfo &iter : exception_infos) {
-    OpDescInfo op_desc_info;
-    if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) {
-      toolkit::dumpdata::DumpData dump_data;
-      dump_data.set_version("2.0");
-      dump_data.set_dump_time(GetNowTime());
-      dump_data.set_op_name(op_desc_info.op_name);
-      for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
-        toolkit::dumpdata::OpInput input;
-        input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));
-        input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i]));
-        for (auto dim : op_desc_info.input_shape[i]) {
-          input.mutable_shape()->add_dim(dim);
-        }
-        input.set_size(op_desc_info.input_size[i]);
-        GELOGI("The input size int exception is %ld", op_desc_info.input_size[i]);
-        dump_data.mutable_input()->Add(std::move(input));
-      }
-      for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) {
-        toolkit::dumpdata::OpOutput output;
-        output.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.output_data_type[j])));
-        output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j]));
-        for (auto dim : op_desc_info.output_shape[j]) {
-          output.mutable_shape()->add_dim(dim);
-        }
-        output.set_size(op_desc_info.output_size[j]);
-        GELOGI("The output size int exception is %ld", op_desc_info.output_size[j]);
-        dump_data.mutable_output()->Add(std::move(output));
-      }
-      uint64_t now_time = GetNowTime();
-      std::string op_name = op_desc_info.op_name;
-      std::string op_type = op_desc_info.op_type;
-      ReplaceStringElem(op_name);
-      ReplaceStringElem(op_type);
-      string dump_file_path =
-          "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
-      GELOGI("The exception dump file path is %s", dump_file_path.c_str());
-
-      uint64_t proto_size = dump_data.ByteSizeLong();
-      std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
-      bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
-      if (!ret || proto_size == 0) {
-        GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
-        return PARAM_INVALID;
-      }
-
-      GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)),
-                        "Failed to dump proto size");
-      GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size),
-                        "Failed to dump proto msg");
-      if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Dump exception input failed");
-        return PARAM_INVALID;
-      }
-
-      if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "Dump exception output failed");
-        return PARAM_INVALID;
-      }
-      GELOGI("Dump exception info SUCCESS");
-    } else {
-      GELOGE(PARAM_INVALID, "Get op desc info failed,task id:%u,stream id:%u", iter.taskid, iter.streamid);
-      return PARAM_INVALID;
-    }
-  }
-  return SUCCESS;
-}
 }  // namespace ge
diff --git a/ge/graph/load/model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h
index 06b42afd..8af07d86 100755
--- a/ge/graph/load/model_manager/data_dumper.h
+++ b/ge/graph/load/model_manager/data_dumper.h
@@ -70,8 +70,6 @@ class DataDumper {
 
   void SaveDumpInput(const std::shared_ptr<Node> &node);
 
-  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id);
-
   // args is device memory stored first output addr
   void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args);
   void SaveEndGraphId(uint32_t task_id, uint32_t stream_id);
@@ -87,14 +85,8 @@ class DataDumper {
 
   void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
   const DumpProperties &GetDumpProperties() const { return dump_properties_; }
-  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
   const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; }
 
-  // Dump exception info
-  Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file);
-  Status DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file);
-  Status DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos);
-
  private:
   void ReleaseDevMem(void **ptr) noexcept;
 
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 08ff3cc3..b2618716 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -2506,9 +2506,9 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
     GE_CHECK_NOTNULL(model_manager);
     auto exception_infos = model_manager->GetExceptionInfos();
     if (exception_infos.size() > 0) {
-      GE_CHK_STATUS_RET(data_dumper_.DumpExceptionInfo(exception_infos), "Dump exception info failed");
+      GE_CHK_STATUS_RET(DumpExceptionInfo(exception_infos), "[Dump][Exception] Dump exception info failed.");
     } else {
-      GELOGI("Exception info is null");
+      GELOGI("[Dump][Exception] Exception info is null.");
     }
     GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed.");
     return INTERNAL_ERROR;
@@ -4086,6 +4086,39 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
   }
 }
 
+void DavinciModel::UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs) {
+  if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) {
+    GELOGD("[Update][OpIOAddrs] No need to update op input output addr.");
+    return;
+  }
+
+  OpDescInfo *op_desc_info = exception_dumper_.MutableOpDescInfo(task_id, stream_id);
+  if (op_desc_info == nullptr) {
+    GELOGD("[Update][OpIOAddrs] Find op desc failed, task_id: %u, stream_id: %u.", task_id, stream_id);
+    return;
+  }
+  size_t input_size = op_desc_info->input_addrs.size();
+  size_t output_size = op_desc_info->output_addrs.size();
+  if (input_size + output_size != io_addrs.size()) {
+    GELOGD("[Update][OpIOAddrs] Op[%s] input size[%zu] and output size[%zu] is not equal to io addr size[%zu]",
+           op_desc_info->op_name.c_str(), input_size, output_size, io_addrs.size());
+    return;
+  }
+
+  vector<void *> input_addrs;
+  vector<void *> output_addrs;
+  for (size_t i = 0; i < io_addrs.size(); i++) {
+    if (i < input_size) {
+      input_addrs.emplace_back(GetRunAddress(io_addrs[i]));
+    } else {
+      output_addrs.emplace_back(GetRunAddress(io_addrs[i]));
+    }
+  }
+  op_desc_info->input_addrs = input_addrs;
+  op_desc_info->output_addrs = output_addrs;
+  GELOGD("[Update][OpIOAddrs] Op [%s] update input output addr success.", op_desc_info->op_name.c_str());
+}
+
 Status DavinciModel::InitL1DataDumperArgs() {
   auto all_dump_model = GetDumpProperties().GetAllDumpModel();
   bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end();
@@ -4109,5 +4142,4 @@ Status DavinciModel::InitL1DataDumperArgs() {
   }
   return SUCCESS;
 }
-
 }  // namespace ge
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 00baab9e..7a36d6e8 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -29,6 +29,7 @@
 #include "common/helper/om_file_helper.h"
 #include "common/opskernel/ge_task_info.h"
 #include "common/properties_manager.h"
+#include "common/dump/exception_dumper.h"
 #include "common/dump/opdebug_register.h"
 #include "common/types.h"
 #include "framework/common/util.h"
@@ -471,13 +472,17 @@ class DavinciModel {
   Status ReportProfilingData();
 
   void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
-    data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
+    exception_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
   }
 
   void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) {
     data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
   }
 
+  Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const {
+    return exception_dumper_.DumpExceptionInfo(exception_infos);
+  }
+
   void SetKnownShapeGlobalStep(void *global_step) {
     known_shape_global_step_ = global_step;
   }
@@ -556,8 +561,9 @@ class DavinciModel {
   void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
   const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
 
+  void UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs);
   bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
-    return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
+    return exception_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
   }
 
  private:
@@ -1001,6 +1007,7 @@ class DavinciModel {
   int64_t maxDumpOpNum_;
   // for data dump
   DataDumper data_dumper_;
+  ExceptionDumper exception_dumper_;
   OpdebugRegister opdebug_register_;
   uint64_t iterator_count_;
   bool is_l1_fusion_enable_;
diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc
index 794d8dea..614e47b1 100755
--- a/ge/graph/load/model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@@ -259,6 +259,7 @@ ModelManager::~ModelManager() {
   model_map_.clear();
   model_aicpu_kernel_.clear();
   cust_aicpu_so_.clear();
+  dump_exception_flag_ = false;
 
   GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
 }
@@ -1492,9 +1493,21 @@ Status ModelManager::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint3
   for (const auto &model : model_map_) {
     auto davinci_model = model.second;
     if (davinci_model->GetDeviceId() == device_id) {
-      GELOGI("Start to GetOpDescInfo of device_id: %u.", device_id);
+      GELOGI("[Get][OpDescInfo] Start to GetOpDescInfo of device_id: %u in davinci model.", device_id);
       if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) {
-        GELOGI("Find specific node of stream_id: %u, task_id: %u.", stream_id, task_id);
+        GELOGI("[Get][OpDescInfo] Find specific node of stream_id: %u, task_id: %u in davinci model.",
+               stream_id, task_id);
+        return SUCCESS;
+      }
+    }
+  }
+  for (const auto &model : hybrid_model_map_) {
+    auto hybrid_model = model.second;
+    if (hybrid_model->GetDeviceId() == device_id) {
+      GELOGI("[Get][OpDescInfo] Start to GetOpDescInfo of device_id: %u in hybrid model.", device_id);
+      if (hybrid_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) {
+        GELOGI("[Get][OpDescInfo] Find specific node of stream_id: %u, task_id: %u in hybrid model.",
+               stream_id, task_id);
         return SUCCESS;
       }
     }
@@ -1507,6 +1520,7 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
   if (iter != options.end()) {
     GELOGI("Find option enable_exeception_dump is %s", iter->second.c_str());
     if (iter->second == "1") {
+      dump_exception_flag_ = true;
       rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast<rtTaskFailCallback>(ExceptionCallback));
       if (rt_ret != RT_ERROR_NONE) {
         GELOGE(RT_FAILED, "rtSetTaskFailCallback failed");
diff --git a/ge/graph/load/model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h
index 489320f4..6e63d037 100755
--- a/ge/graph/load/model_manager/model_manager.h
+++ b/ge/graph/load/model_manager/model_manager.h
@@ -313,6 +313,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
     instance->AddExceptionInfo(*rt_exception_info);
   }
 
+  bool IsDumpExceptionOpen() { return dump_exception_flag_; }
  private:
   ///
   /// @ingroup domi_ome
@@ -355,6 +356,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_;
 
   static DumpProperties dump_properties_;
+  bool dump_exception_flag_ = false;
 };
 }  // namespace ge
 
diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
index 108fe78e..6c65187b 100644
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -312,6 +312,7 @@ void KernelExTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
 Status KernelExTaskInfo::UpdateArgs() {
   GELOGI("KernelExTaskInfo::UpdateArgs in.");
   davinci_model_->SetTotalIOAddrs(io_addrs_);
+  davinci_model_->UpdateOpIOAddrs(task_id_, stream_id_, io_addrs_);
   GELOGI("KernelExTaskInfo::UpdateArgs success.");
   return SUCCESS;
 }
diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
index 31ed7889..db1f6c30 100755
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -480,6 +480,7 @@ Status KernelTaskInfo::UpdateArgs() {
       return CopyNoncontinuousArgs(io_addr_offset_);
     }
     davinci_model_->SetTotalIOAddrs(io_addrs_);
+    davinci_model_->UpdateOpIOAddrs(task_id_, stream_id_, io_addrs_);
   } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
     return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead));
   }
diff --git a/ge/hybrid/executor/hybrid_execution_context.cc b/ge/hybrid/executor/hybrid_execution_context.cc
index 50f6287c..005f38bc 100644
--- a/ge/hybrid/executor/hybrid_execution_context.cc
+++ b/ge/hybrid/executor/hybrid_execution_context.cc
@@ -62,5 +62,27 @@ Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) {
   GELOGE(RT_FAILED, "Failed to invoke rtStreamSynchronize, ret = %d", rt_ret);
   return RT_FAILED;
 }
+
+Status GraphExecutionContext::DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) {
+  if (exception_infos.empty()) {
+    GELOGI("[Dump][ExceptionInfo] Exception info is null.");
+    return SUCCESS;
+  }
+  GELOGI("[Dump][ExceptionInfo] Start to search dynamic op info and to dump.");
+  if (exception_dumper.DumpExceptionInfo(exception_infos) != SUCCESS) {
+    GELOGE(FAILED, "[Dump][Exception] Dump dynamic op exception info failed.");
+    return FAILED;
+  }
+  GELOGI("[Dump][ExceptionInfo] Start to search static op info and to dump.");
+  for (const auto &iter : davinci_model) {
+    if (iter != nullptr) {
+      if (iter->DumpExceptionInfo(exception_infos) != SUCCESS) {
+        GELOGE(FAILED, "[Dump][ExceptionInfo] Dump static op exception info failed.");
+        return FAILED;
+      }
+    }
+  }
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
\ No newline at end of file
diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h
index 54840c6a..67a96e98 100644
--- a/ge/hybrid/executor/hybrid_execution_context.h
+++ b/ge/hybrid/executor/hybrid_execution_context.h
@@ -23,6 +23,7 @@
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/ge_local_context.h"
+#include "graph/load/model_manager/davinci_model.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/common/tensor_value.h"
 #include "hybrid/executor/hybrid_profiler.h"
@@ -54,6 +55,7 @@ struct GraphExecutionContext {
   void SetErrorCode(Status error_code);
   Status GetStatus() const;
   Status Synchronize(rtStream_t rt_stream);
+  Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos);
 
   uint64_t session_id = 0;
   uint64_t context_id = 0;
@@ -68,6 +70,8 @@ struct GraphExecutionContext {
   DumpProperties dump_properties;
   bool trace_enabled = false;
   bool dump_enabled = false;
+  ExceptionDumper exception_dumper;
+  std::vector<std::shared_ptr<ge::DavinciModel>> davinci_model;
   std::atomic_bool is_eos_{false};
   long profiling_level = 0;
   long iteration = 0;
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index b6942b10..1a058388 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -53,6 +53,8 @@ class HybridModelAsyncExecutor {
 
   Status Stop();
 
+  const GraphExecutionContext * GeContext() { return executor_->GetContext(); }
+
   Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);
 
  private:
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index ceffa203..4cd2e4c0 100644
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -17,6 +17,7 @@
 #include "hybrid_model_executor.h"
 #include "graph/ge_context.h"
 #include "graph/runtime_inference_context.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "common/dump/dump_manager.h"
 
 namespace ge {
@@ -79,8 +80,17 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
                         "Failed to execute partitioned call.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");
 
-  HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
-  RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
+  Status ret = executor.Synchronize();
+  if (ret != ge::SUCCESS) {
+    auto model_manager = ModelManager::GetInstance();
+    GE_CHECK_NOTNULL(model_manager);
+    auto exception_infos = model_manager->GetExceptionInfos();
+    if (!exception_infos.empty()) {
+      HYBRID_CHK_STATUS_RET(context_.DumpExceptionInfo(exception_infos),
+                            "[Execute][GraphInternal] Dump exception info failed.");
+    }
+    GELOGE(ret, "[Execute][GraphInternal] Synchronize failed.");
+  }
 
   args.outputs.clear();
   HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");
diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
index 97b4e1aa..a839ff65 100644
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -4,6 +4,7 @@
 #include "common/dump/dump_manager.h"
 #include "graph/ge_context.h"
 #include "graph/runtime_inference_context.h"
+#include "graph/load/model_manager/model_manager.h"
 
 namespace ge {
 namespace hybrid {
@@ -260,6 +261,13 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
     ret = stage_executors_[i]->Synchronize();
 
     if (ret != SUCCESS) {
+      auto model_manager = ModelManager::GetInstance();
+      GE_CHECK_NOTNULL(model_manager);
+      auto exception_infos = model_manager->GetExceptionInfos();
+      if (!exception_infos.empty()) {
+        HYBRID_CHK_STATUS_RET(context_.DumpExceptionInfo(exception_infos),
+                              "[Execute][GraphInternal] Dump exception info failed.");
+      }
       GELOGE(ret, "[Executor: %zu] Failed to synchronize result.", i);
       has_error = true;
       continue;
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index f83d71ff..00aa64c9 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -19,6 +19,7 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/tensor_adapter.h"
 #include "graph/debug/ge_attr_define.h"
+#include "graph/load/model_manager/model_manager.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor//worker//shape_inference_engine.h"
 #include "common/dump/dump_op.h"
@@ -70,6 +71,7 @@ class NodeDoneCallback {
   Status PrepareConstInputs(const NodeItem &node_item);
   Status DumpDynamicNode();
   Status ProfilingReport();
+  Status SaveDumpOpInfo();
   Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
                          std::vector<TaskDescInfo> &task_desc_info);
   GraphExecutionContext *graph_context_;
@@ -259,6 +261,40 @@ Status NodeDoneCallback::DumpDynamicNode() {
   return SUCCESS;
 }
 
+Status NodeDoneCallback::SaveDumpOpInfo() {
+  GE_CHECK_NOTNULL(graph_context_);
+  GE_CHECK_NOTNULL(graph_context_->model);
+
+  auto node = context_->GetNodeItem().node;
+  if (node == nullptr) {
+    GELOGE(PARAM_INVALID, "[Save][DumpOpInfo] Get node is nullptr.");
+    return PARAM_INVALID;
+  }
+  auto op_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
+
+  vector<void *> input_addrs;
+  vector<void *> output_addrs;
+  for (int i = 0; i < context_->NumInputs(); i++) {
+    auto tensor_value = context_->GetInput(i);
+    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Save][DumpOpInfo] Tensor value is nullptr.");
+    void *input_addr = const_cast<void *>(tensor_value->GetData());
+    input_addrs.emplace_back(input_addr);
+  }
+  for (int j = 0; j < context_->NumOutputs(); j++) {
+    auto tensor_value = context_->GetOutput(j);
+    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Save][DumpOpInfo] Tensor value is nullptr.");
+    void *output_addr = const_cast<void *>(tensor_value->GetData());
+    output_addrs.emplace_back(output_addr);
+  }
+
+  uint32_t stream_id = context_->GetStreamId();
+  uint32_t task_id = context_->GetTaskId();
+  graph_context_->exception_dumper.SaveDumpOpInfo(op_desc, task_id, stream_id, input_addrs, output_addrs);
+
+  return SUCCESS;
+}
+
 Status NodeDoneCallback::OnNodeDone() {
   auto &node_item = context_->GetNodeItem();
   GELOGI("[%s] Start callback process.", node_item.NodeName().c_str());
@@ -271,6 +307,12 @@ Status NodeDoneCallback::OnNodeDone() {
     GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node");
   }
 
+  auto model_manager = ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  if (model_manager->IsDumpExceptionOpen()) {
+    GE_CHK_STATUS_RET(SaveDumpOpInfo(), "[Save][DumpOpInfo] Failed to dump op info.");
+  }
+
   if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
     GE_CHK_STATUS_RET(ProfilingReport(), "Report node[%s] to profiling failed.",
                       node_item.NodeName().c_str());
diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc
index c741fe7e..9d339b73 100755
--- a/ge/hybrid/hybrid_davinci_model.cc
+++ b/ge/hybrid/hybrid_davinci_model.cc
@@ -80,6 +80,12 @@ class HybridDavinciModel::Impl {
     model_.SetOmName(model_name);
   }
 
+  uint32_t GetDeviceId() {
+    return model_.GetDeviceId();
+  }
+
+  const GraphExecutionContext * GeContext() { return executor_.GeContext(); }
+
   uint64_t GetSessionId() {
     return model_.GetSessionId();
   }
@@ -186,6 +192,11 @@ void HybridDavinciModel::SetOmName(const string &om_name) {
   }
 }
 
+uint32_t HybridDavinciModel::GetDeviceId() const {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->GetDeviceId();
+}
+
 Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
   GE_CHECK_NOTNULL(impl_);
   return impl_->GetDynamicBatchInfo(batch_info, dynamic_type);
@@ -217,6 +228,23 @@ void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
   }
 }
 
+bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
+  if (impl_ == nullptr) {
+    return false;
+  }
+  auto context = impl_->GeContext();
+  GE_CHECK_NOTNULL(context);
+  bool ret = context->exception_dumper.GetOpDescInfo(stream_id, task_id, op_desc_info);
+  if (!ret) {
+    for (const auto &iter : context->davinci_model) {
+      if (iter->GetOpDescInfo(stream_id, task_id, op_desc_info)) {
+        return true;
+      }
+    }
+  }
+  return ret;
+}
+
 uint64_t HybridDavinciModel::GetSessionId() {
   GE_CHECK_NOTNULL(impl_);
   return impl_->GetSessionId();
diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h
index 3b3473ff..7ba6a009 100644
--- a/ge/hybrid/hybrid_davinci_model.h
+++ b/ge/hybrid/hybrid_davinci_model.h
@@ -61,6 +61,8 @@ class HybridDavinciModel {
 
   uint64_t GetSessionId();
 
+  uint32_t GetDeviceId() const;
+
   Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
 
   void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order);
@@ -72,6 +74,8 @@ class HybridDavinciModel {
                                 std::vector<uint32_t> &input_formats,
                                 std::vector<uint32_t> &output_formats);
 
+  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
+
   void SetModelDescVersion(bool is_new_model_desc);
 
  private:
diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc
index 67a7a101..ebfd3ad9 100644
--- a/ge/hybrid/hybrid_davinci_model_stub.cc
+++ b/ge/hybrid/hybrid_davinci_model_stub.cc
@@ -64,6 +64,10 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
 void HybridDavinciModel::SetOmName(const string &om_name) {
 }
 
+uint32_t HybridDavinciModel::GetDeviceId() const {
+  return 0;
+}
+
 uint64_t HybridDavinciModel::GetSessionId() {
   return 0;
 }
@@ -85,6 +89,10 @@ Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &i
   return UNSUPPORTED;
 }
 
+bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
+  return true;
+}
+
 void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
 }
 }  // namespace hybrid
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index 119db0af..34ffa6ee 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -199,6 +199,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
       GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
       return RT_ERROR_TO_GE_STATUS(rt_ret);
     }
+    context.SetTaskId(task_id);
+    context.SetStreamId(stream_id);
     GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
     (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 1e2fbfe8..ccd95e5d 100755
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -206,6 +206,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
     GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
     return RT_ERROR_TO_GE_STATUS(rt_ret);
   }
+  context.SetTaskId(task_id);
+  context.SetStreamId(stream_id);
   GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
   (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
   auto callback = [=, &context]() {
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
index ae2f8bfe..f05763b5 100644
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -114,6 +114,13 @@ Status KnownNodeTask::Init(TaskContext &context) {
   GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
                                                                     davinci_model_->Id(), davinci_model_->SubModelId()),
                     "KnownNodeTask::Init destroy aicpu kernel failed.");
+  if (!load_flag_) {
+    auto execution_context = const_cast<GraphExecutionContext *>(context.GetExecutionContext());
+    GE_CHECK_NOTNULL(execution_context);
+    auto &davinci_model = execution_context->davinci_model;
+    davinci_model.emplace_back(davinci_model_);
+    load_flag_ = true;
+  }
   GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName());
   return SUCCESS;
 }
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
index 26141b5a..629cb543 100644
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
@@ -42,6 +42,7 @@ class KnownNodeTask : public NodeTask {
   virtual Status DoInitDavinciModel(void *weight, size_t weight_size);
  private:
   std::shared_ptr<DavinciModel> davinci_model_ = nullptr;
+  bool load_flag_ = false;
 };
 
 class KnownNodeExecutor : public NodeExecutor {
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 9e9a6108..fddb355a 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -165,6 +165,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
     "${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
+	"${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc"
     "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
     "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
@@ -744,6 +745,7 @@ set(MULTI_PARTS_TEST_FILES
     "common/datatype_transfer_unittest.cc"
     "common/dump_manager_unittest.cc"
     "common/dump_op_unittest.cc"
+	"common/dump_exception_unittest.cc"
     "common/opdebug_register_unittest.cc"
     "common/format_transfer_unittest.cc"
     "common/format_transfer_transpose_unittest.cc"
diff --git a/tests/ut/ge/common/dump_exception_unittest.cc b/tests/ut/ge/common/dump_exception_unittest.cc
new file mode 100644
index 00000000..339d532e
--- /dev/null
+++ b/tests/ut/ge/common/dump_exception_unittest.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define protected public
+#define private public
+#include "common/dump/exception_dumper.h"
+#include "common/debug/log.h"
+#include "common/ge_inner_error_codes.h"
+#undef private
+#undef protected
+
+namespace ge {
+class UTEST_dump_exception : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+
+TEST_F(UTEST_dump_exception, save_dump_op_info_success) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2");
+  uint32_t task_id = 1;
+  uint32_t stream_id = 233;
+  vector<void *> input_addr;
+  vector<void *> output_addr;
+  ExceptionDumper exception_dumper;
+  exception_dumper.SaveDumpOpInfo(op_desc, task_id, stream_id, input_addr, output_addr);
+}
+
+TEST_F(UTEST_dump_exception, dump_exception_info) {
+  rtExceptionInfo exception_info = {1, 2, 3, 4, 5};
+  std::vector<rtExceptionInfo> exception_infos = { exception_info };
+  OpDescInfo op_desc_info = {"Save", "Save", 1, 2, {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {}, {2},
+                             {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {}, {2}};
+
+  ExceptionDumper exception_dumper;
+  exception_dumper.op_desc_info_ = { op_desc_info };
+  exception_dumper.DumpExceptionInfo(exception_infos);
+}
+}  // namespace ge
\ No newline at end of file
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 55f418d6..47145cdc 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -948,4 +948,17 @@ TEST_F(UtestDavinciModel, simple_test_gmock) {
     EXPECT_EQ(mock_stub.func2(2, 5), 1023);
     EXPECT_EQ(mock_stub.func2(3, 5), 1023);
 }
+
+TEST_F(UtestDavinciModel, update_io_addr_success) {
+  DavinciModel model(0, nullptr);
+  uint32_t task_id = 1;
+  uint32_t stream_id = 2;
+  model.fixed_mem_base_ = 0x22;
+  model.mem_base_ = reinterpret_cast<uint8_t *>(&task_id);
+  OpDescInfo op_desc_info = {"Save", "Save", 1, 2, {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {nullptr}, {2},
+                             {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {nullptr}, {2}};
+  model.exception_dumper_.op_desc_info_ = { op_desc_info };
+  vector<void *> io_addr = {nullptr, nullptr};
+  model.UpdateOpIOAddrs(task_id, stream_id, io_addr);
+}
 }  // namespace ge