!1898 data dump

Merge pull request !1898 from caifubi/data-dump
5 years ago · e03bd975a9
--- a/build.sh
+++ b/build.sh
@@ -24,7 +24,7 @@ usage()
 {
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
  echo "              [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
  echo ""
  echo "Options:"
@@ -48,6 +48,7 @@ usage()
  echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
  echo "    -Q Enable dump memory, default off"
  echo "    -D Enable dumping of function graph ir, default on"
  echo "    -S Enable async data dump, default off"
  echo "    -z Compile dataset & mindrecord, default on"
  echo "    -M Enable MPI and NCCL for GPU training, gpu default on"
  echo "    -V Specify the minimum required cuda version, default CUDA 10.1"
@@ -88,6 +89,7 @@ checkopts()
  ENABLE_TIMELINE="off"
  ENABLE_DUMP2PROTO="on"
  ENABLE_DUMPE2E="off"
  ENABLE_DATA_DUMP="off"
  ENABLE_DUMP_IR="on"
  COMPILE_MINDDATA="on"
  ENABLE_MPI="off"
@@ -102,7 +104,7 @@ checkopts()
  ENABLE_PYTHON="on"

  # Process the options
  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
@@ -218,6 +220,11 @@ checkopts()
        ENABLE_DUMPE2E="$OPTARG"
        echo "enable dump end to end"
        ;;
      S)
        check_on_off $OPTARG S
        ENABLE_DATA_DUMP="$OPTARG"
        echo "enable data dump"
        ;;
      D)
        check_on_off $OPTARG D
        ENABLE_DUMP_IR="$OPTARG"
@@ -321,6 +328,9 @@ build_mindspore()
    if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
    fi
    if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
    fi
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
    if [[ "X$ENABLE_MPI" = "Xon" ]]; then
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E)
    add_compile_definitions(ENABLE_DUMP_E2E)
 endif()

 if(ENABLE_DATA_DUMP)
    add_compile_definitions(ENABLE_DATA_DUMP)
 endif()

 if(ENABLE_DEBUGGER)
    add_compile_definitions(ENABLE_DEBUGGER)
 endif()
--- a/config/data_dump.json
+++ b/config/data_dump.json
@@ -0,0 +1,15 @@
 {
  "DumpSettings": {
    "net_name": "ResNet50",
    "mode": 1,
    "iteration": 0,
    "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
  },

  "DumpSettingsSpec": {
    "net_name": "net name eg:ResNet50",
    "mode": "0: dump all kernels, 1: dump kernels in kernels list",
    "iteration": "specified iteration ",
    "kernels": "op's full scope name which need to be dump"
  }
 }
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9
 Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -109,8 +109,12 @@ if (ENABLE_D)
    file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
    ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})

    file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto")
    ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})

    list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
    list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
    list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})

    add_compile_definitions(ENABLE_D)
 endif ()
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
        )
 endif (ENABLE_DEBUGGER)

 if (ENABLE_D)
    list(APPEND _DEBUG_SRC_LIST
        "${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
        )
    if (ENABLE_DATA_DUMP)
        list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
    endif(ENABLE_DATA_DUMP)
 endif()

 if (ENABLE_DUMP_E2E)
    list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
 endif (ENABLE_DUMP_E2E)
--- a/mindspore/ccsrc/debug/common.cc
+++ b/mindspore/ccsrc/debug/common.cc
@@ -0,0 +1,125 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "debug/common.h"

 #include <memory>
 #include <optional>
 #include "utils/system/env.h"
 #include "utils/system/file_system.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"

 namespace mindspore {
 std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
  std::string out_path;
  auto path_split_pos = input_path.find_last_of('/');
  if (path_split_pos == std::string::npos) {
    path_split_pos = input_path.find_last_of('\\');
  }
  // get real path
  char real_path[PATH_MAX] = {0};
  if (path_split_pos != std::string::npos) {
    std::string prefix_path = input_path.substr(0, path_split_pos);
    if (prefix_path.length() >= PATH_MAX) {
      MS_LOG(ERROR) << "Prefix path is too longer!";
      return std::nullopt;
    }
    std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
    auto ret = CreateNotExistDirs(prefix_path);
    if (!ret) {
      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
      return std::nullopt;
    }

    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
      return std::nullopt;
    }
    out_path = std::string(real_path) + last_path;
  }

  if (path_split_pos == std::string::npos) {
    if (input_path.length() >= PATH_MAX) {
      MS_LOG(ERROR) << "Prefix path is too longer!";
      return std::nullopt;
    }
    if (nullptr == realpath(input_path.c_str(), real_path)) {
      MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
    }
    out_path = std::string(real_path);
  }
  return out_path;
 }

 bool Common::CreateNotExistDirs(const std::string &path) {
  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
  MS_EXCEPTION_IF_NULL(fs);
  char temp_path[PATH_MAX] = {0};
  if (path.length() > PATH_MAX) {
    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
    return false;
  }
  for (uint32_t i = 0; i < path.length(); i++) {
    temp_path[i] = path[i];
    if (temp_path[i] == '\\' || temp_path[i] == '/') {
      if (i != 0) {
        char tmp_char = temp_path[i];
        temp_path[i] = '\0';
        std::string path_handle(temp_path);
        if (!fs->FileExist(temp_path)) {
          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
          if (!fs->CreateDir(temp_path)) {
            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
            return false;
          }
        }
        temp_path[i] = tmp_char;
      }
    }
  }

  if (!fs->FileExist(path)) {
    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
    if (!fs->CreateDir(path)) {
      MS_LOG(ERROR) << "Create " << path << " dir error";
      return false;
    }
  }
  return true;
 }

 std::optional<std::string> Common::GetConfigFile(const std::string &env) {
  if (env.empty()) {
    MS_LOG(EXCEPTION) << "Invalid env";
  }
  auto config_path_str = std::getenv(env.c_str());
  if (config_path_str == nullptr) {
    MS_LOG(ERROR) << "Please export env:" << env;
    return {};
  }
  MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;

  std::string dump_config_file(config_path_str);
  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
  MS_EXCEPTION_IF_NULL(fs);
  if (!fs->FileExist(dump_config_file)) {
    MS_LOG(ERROR) << dump_config_file << " not exist.";
    return {};
  }
  return dump_config_file;
 }
 }  // namespace mindspore
--- a/mindspore/ccsrc/debug/common.h
+++ b/mindspore/ccsrc/debug/common.h
@@ -0,0 +1,36 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_

 #include <string>
 #include <optional>
 #include "utils/contract.h"

 namespace mindspore {
 class Common {
 public:
  Common() = default;
  ~Common() = default;
  static std::optional<std::string> GetRealPath(const std::string &input_path);
  static std::optional<std::string> GetConfigFile(const std::string &env);

 private:
  static bool CreateNotExistDirs(const std::string &path);
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
--- a/mindspore/ccsrc/debug/data_dump_parser.cc
+++ b/mindspore/ccsrc/debug/data_dump_parser.cc
@@ -0,0 +1,152 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "debug/data_dump_parser.h"

 #include <fstream>
 #include "utils/context/ms_context.h"
 #include "debug/common.h"

 constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
 constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
 constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
 namespace mindspore {
 void DataDumpParser::ResetParam() {
  enable_ = false;
  net_name_.clear();
  dump_mode_ = 0;
  dump_step_ = 0;
  kernel_set_.clear();
 }

 bool DataDumpParser::DumpEnabled() const {
  auto enable_dump = std::getenv(kEnableDataDump);
  if (!enable_dump) {
    MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
    return false;
  }

  auto enabled = std::atoi(enable_dump);
  if (enabled != 1) {
    MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
    return false;
  }

  auto context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context);
  if (context->execution_mode() == kPynativeMode) {
    MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
  }
  return true;
 }

 std::optional<std::string> DataDumpParser::GetDumpPath() const {
  auto dump_path = std::getenv(kDataDumpPath);
  if (!dump_path) {
    MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
    return {};
  }
  std::string dump_path_str(dump_path);
  return dump_path_str;
 }

 void DataDumpParser::ParseDumpConfig() {
  std::lock_guard<std::mutex> guard(lock_);
  MS_LOG(INFO) << "[DataDump] parse start";
  if (!DumpEnabled()) {
    MS_LOG(INFO) << "[DataDump] dump not enable";
    return;
  }

  ResetParam();

  auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
  if (!dump_config_file.has_value()) {
    MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
  }

  std::ifstream json_file(dump_config_file.value());
  if (!json_file.is_open()) {
    MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
  }

  nlohmann::json j;
  json_file >> j;
  if (j.find("DumpSettings") == j.end()) {
    MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
  }

  nlohmann::json dump_settings = j.at("DumpSettings");
  // convert json to string
  std::stringstream ss;
  ss << dump_settings;
  std::string cfg = ss.str();
  MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
  if (!IsConfigExist(dump_settings)) {
    MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
  }

  if (!ParseDumpSetting(dump_settings)) {
    MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
  }
 }

 bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
  if (!DumpEnabled()) {
    return false;
  }
  if (dump_mode_ == 0) {
    return true;
  }
  auto iter = kernel_set_.find(op_full_name);
  return iter != kernel_set_.end();
 }

 bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
  if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
      dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
    MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
    return false;
  }
  return true;
 }

 bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
  auto mode = dump_settings.at("mode");
  auto net_name = dump_settings.at("net_name");
  auto iteration = dump_settings.at("iteration");
  auto kernels = dump_settings.at("kernels");
  if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
    MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
    enable_ = false;
    return false;
  }

  enable_ = true;
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  dump_mode_ = mode;
  net_name_ = net_name;
  dump_step_ = iteration;
  for (const auto &kernel : kernels) {
    auto kernel_str = kernel.dump();
    kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
    MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
    kernel_set_.insert(kernel_str);
  }
  return true;
 }
 }  // namespace mindspore
--- a/mindspore/ccsrc/debug/data_dump_parser.h
+++ b/mindspore/ccsrc/debug/data_dump_parser.h
@@ -0,0 +1,61 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_

 #include <string>
 #include <set>
 #include <mutex>
 #include <optional>
 #include "nlohmann/json.hpp"
 #include "common/utils.h"

 namespace mindspore {
 class DataDumpParser {
 public:
  static DataDumpParser &GetInstance() {
    static DataDumpParser instance;
    return instance;
  }
  void ParseDumpConfig();
  bool NeedDump(const std::string &op_full_name) const;
  bool DumpEnabled() const;
  std::optional<std::string> GetDumpPath() const;
  bool enable() const { return enable_; }
  const std::string &net_name() const { return net_name_; }
  uint32_t dump_mode() const { return dump_mode_; }
  uint32_t dump_step() const { return dump_step_; }
  const std::set<std::string> &kernel_set() const { return kernel_set_; }

 private:
  DataDumpParser() = default;
  virtual ~DataDumpParser() = default;
  DISABLE_COPY_AND_ASSIGN(DataDumpParser);

  void ResetParam();
  bool IsConfigExist(const nlohmann::json &dump_settings) const;
  bool ParseDumpSetting(const nlohmann::json &dump_settings);

  std::mutex lock_;
  bool enable_{false};
  std::string net_name_;
  uint32_t dump_mode_{0};
  uint32_t dump_step_{0};
  std::set<std::string> kernel_set_;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
--- a/mindspore/ccsrc/debug/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/e2e_dump.cc
@@ -17,12 +17,14 @@
 #include <limits.h>
 #include <fstream>
 #include <string>
 #include <optional>
 #include <nlohmann/json.hpp>
 #include "utils/log_adapter.h"
 #include "utils/system/file_system.h"
 #include "utils/system/env.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"
 #include "debug/common.h"

 using json = nlohmann::json;

@@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
    return false;
  }

  std::string realpath;
  bool ret = GetRealPath(filename, &realpath);
  if (!ret) {
  auto realpath = Common::GetRealPath(filename);
  if (!realpath.has_value()) {
    MS_LOG(ERROR) << "Get real path failed.";
    return false;
  }
  std::ofstream fd;
  fd.open(realpath, std::ios::binary | std::ios::out);
  fd.open(realpath.value(), std::ios::binary | std::ios::out);
  if (!fd.is_open()) {
    MS_LOG(ERROR) << "Open file " << realpath << " fail.";
    MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
    return false;
  }
  (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
  fd.close();
  return true;
 }

 bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
  MS_EXCEPTION_IF_NULL(outpath);
  auto path_split_pos = inpath.find_last_of('/');
  if (path_split_pos == std::string::npos) {
    path_split_pos = inpath.find_last_of('\\');
  }
  // get real path
  char real_path[PATH_MAX] = {0};
  if (path_split_pos != std::string::npos) {
    std::string prefix_path = inpath.substr(0, path_split_pos);
    if (prefix_path.length() >= PATH_MAX) {
      MS_LOG(ERROR) << "Prefix path is too longer!";
      return false;
    }
    std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
    auto ret = CreateNotExistDirs(prefix_path);
    if (ret == false) {
      MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
      return false;
    }

    if (nullptr == realpath(prefix_path.c_str(), real_path)) {
      MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
      return false;
    }
    *outpath = std::string(real_path) + last_path;
  }

  if (path_split_pos == std::string::npos) {
    if (inpath.length() >= PATH_MAX) {
      MS_LOG(ERROR) << "Prefix path is too longer!";
      return false;
    }
    if (nullptr == realpath(inpath.c_str(), real_path)) {
      MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
    }
    *outpath = std::string(real_path);
  }

  return true;
 }

 bool Dump::CreateNotExistDirs(const std::string &path) {
  std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
  MS_EXCEPTION_IF_NULL(fs);
  char temp_path[PATH_MAX] = {0};
  if (path.length() > PATH_MAX) {
    MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
    return false;
  }
  for (uint32_t i = 0; i < path.length(); i++) {
    temp_path[i] = path[i];
    if (temp_path[i] == '\\' || temp_path[i] == '/') {
      if (i != 0) {
        char tmp_char = temp_path[i];
        temp_path[i] = '\0';
        std::string path_handle(temp_path);
        if (!fs->FileExist(temp_path)) {
          MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
          if (!fs->CreateDir(temp_path)) {
            MS_LOG(ERROR) << "Create " << path_handle << " dir error";
            return false;
          }
        }
        temp_path[i] = tmp_char;
      }
    }
  }

  if (!fs->FileExist(path)) {
    MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
    if (!fs->CreateDir(path)) {
      MS_LOG(ERROR) << "Create " << path << " dir error";
      return false;
    }
  }

  return true;
 }
 }  // namespace mindspore
--- a/mindspore/ccsrc/debug/e2e_dump.h
+++ b/mindspore/ccsrc/debug/e2e_dump.h
@@ -59,10 +59,6 @@ class Dump {
  uint32_t cur_iter_;
  std::vector<std::string> dump_kernels_;

  static bool GetRealPath(const std::string &inpath, std::string *outpath);

  static bool CreateNotExistDirs(const std::string &path);

 private:
  bool ParseDumpConfig(const std::string &dump_config_file);
  bool IsConfigExist(const nlohmann::json &dumpSettings);
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -42,6 +42,7 @@
 #include "device/ascend/ascend_memory_manager.h"
 #include "debug/tensor_load.h"

 using ge::model_runner::ModelRunner;
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
@@ -90,9 +91,16 @@ std::string GetRankId() {
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }

 void AscendKernelRuntime::ClearGraphModelMap() {
 #ifdef ENABLE_DATA_DUMP
  for (auto &iter : graph_data_dumper_) {
    MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
    iter.second->UnloadDumpInfo();
  }
  graph_data_dumper_.clear();
 #endif
  for (auto &iter : graph_model_map_) {
    MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
    auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
    auto ret = ModelRunner::Instance().UnloadModel(iter.first);
    if (!ret) {
      MS_LOG(ERROR) << "UnloadModel failed";
    }
@@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
    return;
  }
  MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
  auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
  auto ret = ModelRunner::Instance().UnloadModel(iter->first);
  if (!ret) {
    MS_LOG(ERROR) << "UnloadModel failed";
  }
@@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
  }
 #endif

 #ifdef ENABLE_DATA_DUMP
  DataDumpParser::GetInstance().ParseDumpConfig();
 #endif

  // Start up profiling before rtSetDevice
  ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
  if (!ret) {
@@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
               << ", wait_active_stream_list size:" << wait_active_stream_list.size()
               << ", force_copy_stream_list size:" << force_copy_stream_list.size();
  std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
  std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
  auto model = std::make_shared<ge::model_runner::DavinciModel>(
    task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
    0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
    resource_manager.get_cur_event_num(), 0);
@@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {

  std::shared_ptr<ge::ModelListener> listener;
  MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
  bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
                                                                           model_iter->second, listener);
  bool status =
    ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
  if (!status) {
    MS_LOG(EXCEPTION) << "Load Task Failed";
  }
  if (ProfilingManager::GetInstance().IsProfiling()) {
    auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
    auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
    auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
    auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
    ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
  }

 #ifdef ENABLE_DATA_DUMP
  LaunchDataDump(NOT_NULL(graph));
 #endif
  if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
    MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
    return false;
  }
  return true;
 }

 #ifdef ENABLE_DATA_DUMP
 void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
  if (!DataDumpParser::GetInstance().DumpEnabled()) {
    return;
  }
  auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
  auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
  MS_EXCEPTION_IF_NULL(data_dumper);
  data_dumper->LoadDumpInfo();
  auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
  if (!ret.second) {
    MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
  }
 }
 #endif

 void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
  auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
  auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
  auto graph_task_names = ProfilingUtils::graph_kernel_name();
  auto iter = graph_task_names.find(graph_id);
  if (iter != graph_task_names.end()) {
@@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
    return false;
  }

  bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
  bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
  if (!status) {
    MS_LOG(ERROR) << "Run task failed";
    DebugTaskIdName(graph->graph_id());
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -24,6 +24,10 @@
 #include "framework/ge_runtime/davinci_model.h"
 #include "device/kernel_runtime_manager.h"
 #include "session/session_basic.h"
 #ifdef ENABLE_DATA_DUMP
 #include "debug/data_dump_parser.h"
 #include "device/ascend/dump/data_dumper.h"
 #endif

 using ge::model_runner::TaskInfo;
 using std::unordered_map;
@@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
  bool initialized_{false};
  unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
  unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
 #ifdef ENABLE_DATA_DUMP
  void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
  unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
 #endif
 };

 MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
--- a/mindspore/ccsrc/device/ascend/dump/data_dumper.cc
+++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.cc
@@ -0,0 +1,282 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifdef ENABLE_DATA_DUMP
 #include "device/ascend/dump/data_dumper.h"

 #include <map>
 #include <memory>
 #include <string>
 #include "utility"
 #include "session/anf_runtime_algorithm.h"
 #include "runtime/mem.h"
 #include "runtime/kernel.h"
 #include "device/ascend/dump/ge_dump.h"
 #include "proto/op_mapping_info.pb.h"
 #include "utils/context/ms_context.h"
 #include "debug/data_dump_parser.h"

 constexpr uint32_t kAicpuLoadFlag = 1;
 constexpr uint32_t kAicpuUnloadFlag = 0;
 constexpr uint32_t kTupleTaskId = 0;
 constexpr uint32_t kTupleStreamId = 1;
 constexpr uint32_t kTupleArgs = 2;
 constexpr uint32_t kCurrentStepTensorIndex = 0;
 constexpr uint32_t kCurrentEpochTensorIndex = 1;
 constexpr uint32_t kStepsPerEpochTensorIndex = 2;

 namespace mindspore {
 namespace device {
 namespace ascend {
 void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
 void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
 void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);

 DataDumper::~DataDumper() {
  ReleaseDevMem(&dev_load_mem_);
  ReleaseDevMem(&dev_unload_mem_);
 }

 void DataDumper::LoadDumpInfo() {
  MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
  MS_EXCEPTION_IF_NULL(kernel_graph_);
  aicpu::dump::OpMappingInfo dump_info;
  SetOpMappingInfo(NOT_NULL(&dump_info));

  auto kernels = kernel_graph_->execution_order();
  for (const auto &kernel : kernels) {
    MS_EXCEPTION_IF_NULL(kernel);
    if (!KernelNeedDump(kernel)) {
      continue;
    }
    MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
    dump_kernel_names_.emplace_back(kernel->fullname_with_scope());

    aicpu::dump::Task task;
    ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
    MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
    dump_info.mutable_task()->Add(std::move(task));
  }
  RtLoadDumpData(dump_info, &dev_load_mem_);
  load_flag_ = true;
  MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
 }

 void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  MS_EXCEPTION_IF_NULL(kernel_graph_);
  auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
  if (!dump_path.has_value()) {
    MS_LOG(EXCEPTION) << "Dump path invalid";
  }
  auto device_id = context_ptr->device_id();
  dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
  MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();

  dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
  dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
  dump_info->set_model_id(kernel_graph_->graph_id());
  dump_info->set_flag(kAicpuLoadFlag);

  const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
  if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
    MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
    return;
  }
  const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
  const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
  const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);

  MS_EXCEPTION_IF_NULL(current_step_tensor);
  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
  MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
  MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
  MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());

  void *current_step = current_step_tensor->device_address()->ptr_;
  void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
  void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;

  if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
    dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
    dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
    dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
  } else {
    MS_LOG(INFO) << "Invalid ctrl tensor device address";
  }
 }

 bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
  if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
      AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
    return false;
  }
  MS_EXCEPTION_IF_NULL(kernel);
  const auto &kernel_set = DataDumpParser::GetInstance().kernel_set();
  return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end();
 }

 void DataDumper::UnloadDumpInfo() {
  if (!load_flag_) {
    MS_LOG(WARNING) << "Load not success, no need to unload";
    return;
  }
  MS_EXCEPTION_IF_NULL(kernel_graph_);
  MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();

  aicpu::dump::OpMappingInfo op_mapping_info;
  op_mapping_info.set_model_id(kernel_graph_->graph_id());
  op_mapping_info.set_flag(kAicpuUnloadFlag);

  for (const auto &kernel_name : dump_kernel_names_) {
    aicpu::dump::Task task;
    auto iter = runtime_info_map_.find(kernel_name);
    if (iter == runtime_info_map_.end()) {
      MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
    }
    MS_EXCEPTION_IF_NULL(iter->second);
    auto task_id = std::get<kTupleTaskId>(*iter->second);
    task.set_task_id(task_id);
    MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
    op_mapping_info.mutable_task()->Add(std::move(task));
  }

  RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
 }

 void DataDumper::ReleaseDevMem(void **ptr) const {
  if (ptr == nullptr) {
    return;
  }
  if (*ptr != nullptr) {
    rtError_t rt_error = rtFree(*ptr);
    if (rt_error != RT_ERROR_NONE) {
      MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
    }
    *ptr = nullptr;
  }
 }

 void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
  dump_task->set_end_graph(false);
  auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
  if (iter == runtime_info_map_.end()) {
    MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
  }
  MS_EXCEPTION_IF_NULL(iter->second);
  auto task_id = std::get<kTupleTaskId>(*iter->second);
  auto stream_id = std::get<kTupleStreamId>(*iter->second);
  auto args = std::get<kTupleArgs>(*iter->second);
  MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;

  dump_task->set_task_id(task_id);
  dump_task->set_stream_id(stream_id);
  MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
  dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
  dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));

  DumpKernelOutput(kernel, args, dump_task);
  DumpKernelInput(kernel, args, dump_task);
 }

 void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
  std::string proto_str;
  size_t proto_size = dump_info.ByteSizeLong();
  bool ret = dump_info.SerializeToString(&proto_str);
  if (!ret || proto_size == 0) {
    MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
  }

  rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
  }

  if (ptr == nullptr) {
    MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
    return;
  }
  rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
  }

  MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
  rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
  if (rt_ret != RT_ERROR_NONE) {
    MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
  }
 }

 void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
  MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
  auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
  uint64_t offset = sizeof(void *) * input_size;
  for (size_t i = 0; i < output_size; ++i) {
    auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
    auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);

    aicpu::dump::Output output;
    output.set_data_type(GetGeDataType(data_type));
    output.set_format(GetGeFormat(output_format, output_shape.size()));
    MS_EXCEPTION_IF_NULL(output.mutable_shape());
    for (auto dim : output_shape) {
      output.mutable_shape()->add_dim(dim);
    }
    output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
    output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
    MS_EXCEPTION_IF_NULL(task->mutable_output());
    task->mutable_output()->Add(std::move(output));
    offset += sizeof(void *);
  }
 }

 void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
  MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
  auto input_size = AnfAlgo::GetInputTensorNum(kernel);
  uint64_t offset = 0;
  for (size_t i = 0; i < input_size; ++i) {
    aicpu::dump::Input input;
    auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
    auto input_node = input_node_with_index.first;
    auto input_index = input_node_with_index.second;
    std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
    auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
    if (output_type == kTypeUnknown) {
      MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
      output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
    }
    auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);

    input.set_data_type(GetGeDataType(output_type));
    input.set_format(GetGeFormat(output_format, output_shape.size()));
    MS_EXCEPTION_IF_NULL(input.mutable_shape());
    for (auto dim : output_shape) {
      input.mutable_shape()->add_dim(dim);
    }
    input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
    MS_EXCEPTION_IF_NULL(task->mutable_input());
    task->mutable_input()->Add(std::move(input));
    offset += sizeof(void *);
  }
 }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
 #endif
--- a/mindspore/ccsrc/device/ascend/dump/data_dumper.h
+++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.h
@@ -0,0 +1,69 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
 #ifdef ENABLE_DATA_DUMP
 #include <tuple>
 #include <map>
 #include <memory>
 #include <string>
 #include <vector>
 #include "session/kernel_graph.h"

 namespace aicpu {
 namespace dump {
 class OpMappingInfo;
 class Task;
 }  // namespace dump
 }  // namespace aicpu
 namespace mindspore {
 namespace device {
 namespace ascend {
 // tuple(op_name, task_id, stream_id, args)
 using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
 class DataDumper {
 public:
  DataDumper(const session::KernelGraph *kernel_graph,
             const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
      : load_flag_(false),
        dev_load_mem_(nullptr),
        dev_unload_mem_(nullptr),
        kernel_graph_(kernel_graph),
        runtime_info_map_(runtime_info_map) {}
  ~DataDumper();
  void LoadDumpInfo();

  void UnloadDumpInfo();

 private:
  void ReleaseDevMem(void **ptr) const;
  bool KernelNeedDump(const CNodePtr &kernel) const;
  void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
  void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;

  bool load_flag_;
  void *dev_load_mem_;
  void *dev_unload_mem_;
  std::vector<std::string> dump_kernel_names_;
  const session::KernelGraph *kernel_graph_;
  std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
 };
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
 #endif
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
--- a/mindspore/ccsrc/device/ascend/dump/ge_dump.h
+++ b/mindspore/ccsrc/device/ascend/dump/ge_dump.h
@@ -0,0 +1,120 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_

 #include <map>
 #include <string>
 #include "proto/ge_dtype.pb.h"
 #include "ir/dtype/type_id.h"
 #include "utils/utils.h"

 namespace mindspore {
 namespace device {
 namespace ascend {
 static ge::proto::DataType GetGeDataType(TypeId type_id) {
  static const std::map<TypeId, ge::proto::DataType> data_type_map = {
    {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED},     {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
    {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
    {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8},     {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
    {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16},   {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
    {TypeId::kNumberTypeInt64, ge::proto::DT_INT64},     {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
    {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64},   {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
    {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
  };
  MS_LOG(INFO) << "Vm origin type_id:" << type_id;
  auto iter = data_type_map.find(type_id);
  if (iter == data_type_map.end()) {
    MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
  }
  return iter->second;
 }

 enum GeFormat {
  kFormat_NCHW = 0,   // NCHW
  kFormat_NHWC,       // NHWC
  kFormat_ND,         // Nd Tensor
  kFormat_NC1HWC0,    // NC1HWC0
  kFormat_FRACTAL_Z,  // FRACTAL_Z
  kFormat_NC1C0HWPAD,
  kFormat_NHWC1C0,
  kFormat_FSR_NCHW,
  kFormat_FRACTAL_DECONV,
  kFormat_C1HWNC0,
  kFormat_FRACTAL_DECONV_TRANSPOSE,
  kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
  kFormat_NC1HWC0_C04,    // NC1HWC0, C0 =4
  kFormat_FRACTAL_Z_C04,  // FRACZ, C0 =4
  kFormat_CHWN,
  kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
  kFormat_HWCN,
  kFormat_NC1KHKWHWC0,  // KH,KW kernel h& kernel w maxpooling max output format
  kFormat_BN_WEIGHT,
  kFormat_FILTER_HWCK,  // filter input tensor format
  kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
  kFormat_HASHTABLE_LOOKUP_KEYS,
  kFormat_HASHTABLE_LOOKUP_VALUE,
  kFormat_HASHTABLE_LOOKUP_OUTPUT,
  kFormat_HASHTABLE_LOOKUP_HITS = 24,
  kFormat_C1HWNCoC0,
  kFormat_MD,
  kFormat_NDHWC,
  kFormat_FRACTAL_ZZ,
  kFormat_FRACTAL_NZ,
  kFormat_NCDHW,
  kFormat_DHWCN,  // 3D filter input tensor format
  kFormat_NDC1HWC0,
  kFormat_FRACTAL_Z_3D,
  kFormat_CN,
  kFormat_NC,
  kFormat_DHWNC,
  kFormat_FRACTAL_Z_3D_TRANSPOSE,  // 3D filter(transpose) input tensor format
  kFormat_RESERVED,
  kFormat_ALL
 };

 static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
  static const std::map<std::string, GeFormat> format_map = {
    // default format: nchw, fractal_nz?
    {kOpFormat_DEFAULT, kFormat_NCHW},
    {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
    {kOpFormat_ND, kFormat_ND},
    {kOpFormat_NCHW, kFormat_NCHW},
    {kOpFormat_NHWC, kFormat_NHWC},
    {kOpFormat_HWCN, kFormat_HWCN},
    {kOpFormat_NC1HWC0, kFormat_NC1HWC0},
    {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
    {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
    {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
    {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
    {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
    {kOpFormat_NDHWC, kFormat_NDHWC},
  };
  MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
  if (format == kOpFormat_DEFAULT) {
    return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
  }
  auto iter = format_map.find(format);
  if (iter == format_map.end()) {
    MS_LOG(EXCEPTION) << "Invalid format:" << format;
  }
  return iter->second;
 }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
--- a/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto
+++ b/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto
@@ -0,0 +1,49 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 syntax = "proto3";

 package ge.proto;

 enum DataType
 {
  DT_UNDEFINED = 0;  // Used to indicate a DataType field has not been set.
  DT_FLOAT     = 1;  // float type
  DT_FLOAT16   = 2;  // fp16 type
  DT_INT8      = 3;  // int8 type
  DT_UINT8     = 4;  // uint8 type
  DT_INT16     = 5;  // int16 type
  DT_UINT16    = 6;  // uint16 type
  DT_INT32     = 7;  //
  DT_INT64     = 8;  // int64 type
  DT_UINT32    = 9;  // unsigned int32
  DT_UINT64    = 10;  // unsigned int64
  DT_BOOL      = 11;  // bool type
  DT_DOUBLE    = 12; // double type
  DT_STRING = 13;            // string type
  DT_DUAL_SUB_INT8 = 14;    /**< dual output int8 type */
  DT_DUAL_SUB_UINT8 = 15;    /**< dual output uint8 type */
  DT_COMPLEX64 = 16;         // complex64 type
  DT_COMPLEX128 = 17;        // complex128 type
  DT_QINT8 = 18;             // qint8 type
  DT_QINT16 = 19;            // qint16 type
  DT_QINT32 = 20;            // qint32 type
  DT_QUINT8 = 21;            // quint8 type
  DT_QUINT16 = 22;           // quint16 type
  DT_RESOURCE  = 23;         // resource type
  DT_STRING_REF = 24;        // string_ref type
  DT_DUAL      = 25;              /**< dual output type */
 }
--- a/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto
+++ b/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto
@@ -0,0 +1,78 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 syntax = "proto3";
 package aicpu.dump;

 message Shape {
  repeated uint64 dim = 1;
 }

 message Output {
  int32 data_type = 1;
  int32 format = 2;
  Shape shape = 3;
  uint64 address = 4;
  string original_name = 5;
  int32 original_output_index = 6;
  int32 original_output_data_type = 7;
  int32 original_output_format = 8;
  uint64 size = 9;
 };

 message Input {
  int32 data_type = 1;
  int32 format = 2;
  Shape shape = 3;
  uint64 address = 4;
  uint64 size = 5;
 }

 message Op {
  string op_name = 1;
  string op_type = 2;
 };

 message Task {
  uint32 task_id = 1;
  uint32 stream_id = 2;
  Op op = 3;
  repeated Output output = 4;
  bool end_graph = 5;
  repeated Input input = 6;
 };

 message OpMappingInfo {
  string dump_path = 1;
  oneof model_name_param {
    string model_name = 2;
  }
  oneof model_id_param {
    uint32 model_id = 3;
  }
  oneof step_id {
    uint64 step_id_addr = 4;
  }
  oneof iterations_per_loop {
    uint64 iterations_per_loop_addr = 5;
  }
  oneof loop_cond {
    uint64 loop_cond_addr = 6;
  }
  uint32 flag = 7; // 0x01 load, 0x00 unload
  repeated Task task = 8;
  string dump_step = 9;
 };
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
@@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
  AddressPtrList kernel_outputs;
  auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
  MS_EXCEPTION_IF_NULL(kernel_mod);
  kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
  if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
    for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
      auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@@ -34,6 +34,7 @@ class CPUKernelRuntime;
 namespace ascend {
 class AscendKernelRuntime;
 class AscendMemoryManager;
 class DataDumper;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
@@ -90,6 +91,7 @@ class DeviceAddress {
  friend class mindspore::device::gpu::GPUMemoryManager;
  friend class mindspore::device::ascend::AscendKernelRuntime;
  friend class mindspore::device::ascend::AscendMemoryManager;
  friend class mindspore::device::ascend::DataDumper;
 };

 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@@ -34,6 +34,7 @@
 #include "device/ascend/kernel_select_ascend.h"
 #include "runtime/base.h"
 #include "device/ascend/ascend_stream_assign.h"

 namespace mindspore {
 namespace device {
 using device::ascend::ProfilingUtils;
@@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
  std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
  MS_EXCEPTION_IF_NULL(mute_inputs);
  mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
  mute_inputs->push_back(switch_loop_input[kEpochParamName]);
  mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
  mute_inputs->push_back(switch_loop_input[kZeroParamName]);
  mute_inputs->push_back(switch_loop_input[kOneParamName]);
@@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
  one->set_abstract(paremeter_abstract_ptr);
  ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
  (*switch_loop_input)[kOneParamName] = one_new;

  ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
  MS_EXCEPTION_IF_NULL(epoch);
  epoch->set_name(kEpochParamName);
  epoch->set_abstract(paremeter_abstract_ptr);
  ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
  (*switch_loop_input)[kEpochParamName] = epoch_new;
 }

 kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
@@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
  *val = 0;
  inputs->push_back(loop_count_tensor);

  // Epoch in device
  tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
  MS_EXCEPTION_IF_NULL(epoch_tensor);
  val = static_cast<int32_t *>(epoch_tensor->data_c());
  MS_EXCEPTION_IF_NULL(val);
  *val = 0;
  inputs->push_back(epoch_tensor);

  tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
  MS_EXCEPTION_IF_NULL(iter_loop_tensor);
  val = static_cast<int32_t *>(iter_loop_tensor->data_c());
@@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
  MS_EXCEPTION_IF_NULL(val);
  *val = 1;
  inputs->push_back(one_tensor);

  MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
 }

--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/device/kernel_adjust.h
@@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
 constexpr auto kIterLoopParamName = "iter_loop";
 constexpr auto kZeroParamName = "zero";
 constexpr auto kOneParamName = "one";
 constexpr auto kEpochParamName = "loop_epoch";
 constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
 constexpr uint32_t kSecondStreamSwitchLabel = 2;

--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc
@@ -26,6 +26,7 @@
 #include "kernel/aicpu/aicpu_kernel_build.h"
 #include "utils/convert_utils.h"
 #include "kernel/aicpu/aicpu_util.h"
 #include "utils/context/ms_context.h"

 using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;

@@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
  if (node_name_ == kTopK) {
    node_name_ = kTopKV2;
  }

  AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
    stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
    kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());

  MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
  return {task_info_ptr};
--- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc
@@ -26,6 +26,7 @@
 #include "runtime/rt.h"
 #include "utils/log_adapter.h"
 #include "utils/convert_utils.h"
 #include "utils/context/ms_context.h"

 namespace mindspore {
 namespace kernel {
@@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
  MS_LOG(DEBUG) << "The block_dim is:" << block_dim;

  TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
    stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
    output_data_addrs, workspace_addrs);
    kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
    input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
  return {task_info_ptr};
 }
 }  // namespace kernel
--- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/kernel/ascend_kernel_mod.h
@@ -21,6 +21,9 @@
 #include <memory>
 #include "framework/ge_runtime/task_info.h"
 #include "kernel/kernel.h"
 #ifdef ENABLE_DATA_DUMP
 #include "debug/data_dump_parser.h"
 #endif

 using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
 namespace mindspore {
@@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
                                           const std::vector<AddressPtr> &, uint32_t) = 0;
  uint32_t block_dim() { return block_dim_; }
  uint32_t stream_id() { return stream_id_; }
  virtual bool NeedDump() {
 #ifdef ENABLE_DATA_DUMP
    return DataDumpParser::GetInstance().NeedDump(kernel_name_);
 #else
    return false;
 #endif
  }

 protected:
  uint32_t block_dim_{1};
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc
@@ -18,6 +18,7 @@
 #include "device/ascend/tasksink/runtime_utils.h"
 #include "session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"

 using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
 using ge::model_runner::HcclTaskInfo;
@@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
               << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
               << ", data_type=" << static_cast<int>(data_type);

  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
    stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
    hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
    RuntimeUtils::HcomDistribute);
    kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
    private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
    RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/kernel/kernel.h
@@ -129,6 +129,10 @@ class KernelMod {
  virtual std::vector<size_t> GenParameters() { return {}; }

  virtual ~KernelMod() = default;
  void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }

 protected:
  std::string kernel_name_;
 };
 using KernelModPtr = std::shared_ptr<KernelMod>;
 }  // namespace kernel
--- a/mindspore/ccsrc/kernel/rts/assign.cc
+++ b/mindspore/ccsrc/kernel/rts/assign.cc
@@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
  }
  stream_id_ = stream_id;

  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
    stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
                                          inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/rts/label_goto.cc
+++ b/mindspore/ccsrc/kernel/rts/label_goto.cc
@@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
                                                  const std::vector<AddressPtr> &, uint32_t stream_id) {
  MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
  std::vector<TaskInfoPtr> task_info_list;
  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
  std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
    std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  task_info_list.emplace_back(task_info_ptr);
  return task_info_list;
--- a/mindspore/ccsrc/kernel/rts/label_set.cc
+++ b/mindspore/ccsrc/kernel/rts/label_set.cc
@@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
                                                 const std::vector<AddressPtr> &, uint32_t stream_id) {
  MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
  std::vector<TaskInfoPtr> task_info_list;
  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
  std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  task_info_list.emplace_back(task_info_ptr);
  return task_info_list;
--- a/mindspore/ccsrc/kernel/rts/label_switch.cc
+++ b/mindspore/ccsrc/kernel/rts/label_switch.cc
@@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
  MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
  std::vector<TaskInfoPtr> task_info_list;
  cond_ = inputs[0]->addr;
  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
  auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  task_info_list.emplace_back(task_info_ptr);
  return task_info_list;
--- a/mindspore/ccsrc/kernel/rts/memcpy_async.cc
+++ b/mindspore/ccsrc/kernel/rts/memcpy_async.cc
@@ -23,6 +23,7 @@
 #include "common/utils.h"
 #include "session/anf_runtime_algorithm.h"
 #include "common/trans.h"
 #include "utils/context/ms_context.h"

 using ge::model_runner::MemcpyAsyncTaskInfo;
 using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
@@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
  }

  stream_id_ = stream_id;
  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
    stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
  std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
    std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
                                          inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc
@@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
               << ", outputs size:" << outputs.size();
  stream_id_ = stream_id;
  std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
    std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
    std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
  return {task_info_ptr};
 }
 }  // namespace kernel
--- a/mindspore/ccsrc/kernel/rts/recv.cc
+++ b/mindspore/ccsrc/kernel/rts/recv.cc
@@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
                                             const std::vector<AddressPtr> &, uint32_t stream_id) {
  MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
  stream_id_ = stream_id;
  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
  EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/rts/send.cc
+++ b/mindspore/ccsrc/kernel/rts/send.cc
@@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
                                             const std::vector<AddressPtr> &, uint32_t stream_id) {
  MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
  stream_id_ = stream_id;
  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
  EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/rts/stream_active.cc
+++ b/mindspore/ccsrc/kernel/rts/stream_active.cc
@@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
  stream_id_ = stream_id;
  std::vector<TaskInfoPtr> task_info_list;
  for (auto &index : active_streams_index_) {
    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
    std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
      std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
    MS_EXCEPTION_IF_NULL(task_info_ptr);
    task_info_list.emplace_back(task_info_ptr);
    MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
--- a/mindspore/ccsrc/kernel/rts/stream_switch.cc
+++ b/mindspore/ccsrc/kernel/rts/stream_switch.cc
@@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
  auto ites_per_loop = inputs[1]->addr;
  MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
               << ", stream_id:" << stream_id;
  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
    std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
  std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
    kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
  MS_EXCEPTION_IF_NULL(task_info_ptr);
  return {task_info_ptr};
 }
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc
@@ -17,7 +17,7 @@
 #include "kernel/tbe/tbe_kernel_mod.h"
 #include <algorithm>
 #include "runtime/rt.h"
 #include "nlohmann/json.hpp"
 #include "utils/context/ms_context.h"
 #include "graphengine/inc/framework/ge_runtime/task_info.h"

 namespace mindspore {
@@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in

  MS_LOG(INFO) << "block_dim is:" << block_dim_;

  TbeTaskInfoPtr task_info_ptr =
    make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
                                               meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
  TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
    kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
    output_data_addrs, workspace_addrs, NeedDump());
  return {task_info_ptr};
 }

--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/session/kernel_graph.h
@@ -36,7 +36,7 @@ namespace session {
 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
 class KernelGraph : public FuncGraph {
 public:
  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
  KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
    inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
    execution_order_ = {};
    executable_ = true;
@@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph {
  AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
  void AddFinalOutputKernel(const AnfNodePtr &node);
  bool IsFinalOutputKernel(const AnfNodePtr &node) const;
  uint32_t current_epoch() const { return current_epoch_; }
  void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }

 private:
  // remove value node form graph
@@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph {
  std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
  std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
  std::set<AnfNodePtr> final_output_kernels_;
  uint32_t current_epoch_;
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
  // set loop_count to zero
  MS_EXCEPTION_IF_NULL(inputs);
  inputs->push_back(tensor);

  auto epoch_tensor = (*inputs_params)[1];
  MS_EXCEPTION_IF_NULL(epoch_tensor);
  auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
  MS_EXCEPTION_IF_NULL(epoch_val);
  *epoch_val = graph->current_epoch();
  epoch_tensor->set_dirty(true);
  inputs->push_back(epoch_tensor);
  MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;

  graph->set_current_epoch(graph->current_epoch() + 1);

  return inputs_params->size();
 }

@@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> &para
 void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                 const std::vector<tensor::TensorPtr> &inputs_const) const {
  std::vector<tensor::TensorPtr> inputs(inputs_const);
  size_t input_ctrl_size = 1;
  size_t input_ctrl_size = 2;
  MS_EXCEPTION_IF_NULL(kernel_graph);
  if (kernel_graph->input_ctrl_tensors()) {
    input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
  }
  auto input_nodes = kernel_graph->inputs();
  if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
  if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
    MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                      << ", input_ctrl_size:" << input_ctrl_size;
  }
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint

 bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }

 bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }

 bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
  return true;
 }
@@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
  static std::vector<uint32_t> stream_id_list;
  return stream_id_list;
 }

 const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
  static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
  return runtime_info_map;
 }
 }  // namespace model_runner
 }  // namespace ge

--- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
@@ -15,7 +15,6 @@
 */
 #include "device/ascend/ascend_stream_assign.h"
 #include "device/ascend/ascend_label_assign.h"
 #include "device/ascend/tasksink/task_generator.h"
 #include "device/kernel_adjust.h"

 namespace mindspore {
@@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
 void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }

 void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }

 namespace tasksink {
 bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
                             uint32_t graph_id) {
  return true;
 }
 }  // namespace tasksink
 }  // namespace ascend
 void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
 bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
--- a/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
+++ b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc
@@ -0,0 +1,30 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "device/ascend/tasksink/task_generator.h"

 namespace mindspore {
 namespace device {
 namespace ascend {
 namespace tasksink {
 bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
                             uint32_t graph_id) {
  return true;
 }
 }  // namespace tasksink
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore