diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index f658f9c427..5daf02b3f3 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -630,6 +630,11 @@ CNodePtr AscendKernelRuntime::GetErrorNodeName(uint32_t streamid, uint32_t taski void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); const std::string local_path = std::string("./task_error_dump/") + std::to_string(task_fail_infoes_.at(0).deviceid); + if (access(local_path.c_str(), F_OK) == 0) { + if (!DeleteDumpDir(local_path)) { + MS_LOG(ERROR) << "Delete dump directory " << local_path << " failed"; + } + } for (const auto &task_fail_info : task_fail_infoes_) { MS_LOG(ERROR) << "Task fail infos task_id: " << task_fail_info.taskid << ", stream_id: " << task_fail_info.streamid << ", tid: " << task_fail_info.tid << ", device_id: " << task_fail_info.deviceid @@ -988,4 +993,52 @@ uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const { return ascend_mem_manager->GetDeviceMemSize(); } +bool AscendKernelRuntime::DeleteDumpDir(std::string path) { + string real_path = GetRealPath(path); + if (DeleteDumpFile(real_path) == -1) { + return false; + } + rmdir(real_path.c_str()); + return true; +} + +int AscendKernelRuntime::DeleteDumpFile(std::string path) { + DIR *dir; + struct dirent *dirinfo; + struct stat statbuf; + string filepath; + int result = 0; + lstat(path.c_str(), &statbuf); + + if (S_ISREG(statbuf.st_mode)) { + result = remove(path.c_str()); + } else if (S_ISDIR(statbuf.st_mode)) { + if ((dir = opendir(path.c_str())) == NULL) { + return -1; + } + + while (!result && (dirinfo = readdir(dir))) { + if (path[path.size() - 1] != '/') { + path = path + "/"; + } + filepath = path + dirinfo->d_name; + if (strcmp(dirinfo->d_name, ".") == 0 || strcmp(dirinfo->d_name, "..") == 0) continue; + result = DeleteDumpFile(filepath); + if (!result) { + rmdir(filepath.c_str()); + } + } + closedir(dir); + } + return result; +} + +std::string AscendKernelRuntime::GetRealPath(std::string path) { + char real_path_mem[PATH_MAX] = {0}; + char *real_path_ret = realpath(path.c_str(), real_path_mem); + if (real_path_ret == nullptr) { + return ""; + } + return std::string(real_path_mem); +} } // namespace mindspore::device::ascend diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h index 0ea737e2ff..cec00ae9f2 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h @@ -15,6 +15,7 @@ */ #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_KERNEL_RUNTIME_H_ #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_KERNEL_RUNTIME_H_ +#include #include #include #include @@ -89,6 +90,9 @@ class AscendKernelRuntime : public KernelRuntime { static void DumpTaskExceptionInfo(const session::KernelGraph *graph); static void TaskFailCallback(rtExceptionInfo *task_fail_info); void ReportProfilingData(); + static bool DeleteDumpDir(std::string path); + static int DeleteDumpFile(std::string path); + static std::string GetRealPath(std::string path); rtContext_t rt_context_{nullptr}; bool initialized_{false};