Browse Source

!6515 Resolve deadlock issue when terminate debugger from UI

Merge pull request !6515 from lichen_101010/terminate_deadlock_issue
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
d30dece8a9
4 changed files with 31 additions and 4 deletions
  1. +2
    -1
      mindspore/ccsrc/backend/session/executor.cc
  2. +14
    -2
      mindspore/ccsrc/debug/debugger/debugger.cc
  3. +11
    -0
      mindspore/ccsrc/pipeline/jit/pipeline.cc
  4. +4
    -1
      mindspore/ccsrc/pipeline/jit/pipeline.h

+ 2
- 1
mindspore/ccsrc/backend/session/executor.cc View File

@@ -102,7 +102,8 @@ void Executor::CheckException() {
}

void Executor::WorkerJoin() {
if (worker_->joinable()) {
// Avoid worker thread join itself which will cause deadlock
if (worker_->joinable() && worker_->get_id() != std::this_thread::get_id()) {
{
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<ExitTask>();


+ 14
- 2
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -444,6 +444,8 @@ void Debugger::CommandLoop() {
case DebuggerCommand::kExitCMD:
MS_LOG(INFO) << "ExitCMD";
Exit();
// Used for debugger termination
run = true;
break;
case DebuggerCommand::kRunCMD:
MS_LOG(INFO) << "RunCMD";
@@ -594,8 +596,18 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten

void Debugger::Exit() {
// clear resource before exit
pipeline::ClearResAtexit();
std::exit(EXIT_FAILURE);
// For node level, debugger has to exit itself because main thread can only exit in step bundary;
// For step level, debugger will notify main thread to exit;
if (run_level_ == "node") {
pipeline::ClearResAtexit();
exit(1);
} else if (run_level_ == "step") {
// Notify main thread to terminate
pipeline::ExecutorPy::DebugTerminate(true);
} else {
pipeline::ClearResAtexit();
exit(1);
}
}

std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode) {


+ 11
- 0
mindspore/ccsrc/pipeline/jit/pipeline.cc View File

@@ -76,6 +76,7 @@ const char IR_TYPE_MINDIR[] = "mind_ir";

ExecutorPyPtr ExecutorPy::executor_ = nullptr;
std::mutex ExecutorPy::instance_lock_;
bool ExecutorPy::debugger_terminate_ = false;

std::unordered_map<abstract::AbstractBasePtrList, int, abstract::AbstractBasePtrListHasher,
abstract::AbstractBasePtrListEqual>
@@ -748,7 +749,17 @@ void ExecutorPy::ProcessVmArg(const py::tuple &args, const std::string &phase, V
ProcessVmArgInner(args, GetResource(phase), arg_list);
}

void ExecutorPy::TerminateDebugger() {
if (debugger_terminate_) {
MS_LOG(INFO) << "Terminate debugger and clear resources!";
ClearResAtexit();
exit(1);
}
}

py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) {
// Mindspore debugger notify main thread to exit after one step, and will not run next step
TerminateDebugger();
std::size_t size = args.size();
if (!py::isinstance<py::str>(phase)) {
MS_LOG(EXCEPTION) << "Run failed, phase input is not a str";


+ 4
- 1
mindspore/ccsrc/pipeline/jit/pipeline.h View File

@@ -97,6 +97,9 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
void DelNetRes(const std::string &id);
void ReleaseResource(const py::object &phase);
static void ClearRes();
static bool GetDebugTerminate() { return debugger_terminate_; }
static void DebugTerminate(bool val) { debugger_terminate_ = val; }
void TerminateDebugger();

std::map<std::string, std::pair<PrimitivePyPtr, std::string>> FetchInfoForQuantExport(const std::string &phase_s);

@@ -111,6 +114,7 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
std::map<std::string, ExecutorInfoPtr> info_;
static std::shared_ptr<ExecutorPy> executor_;
static std::mutex instance_lock_;
static bool debugger_terminate_;
};
using ExecutorPyPtr = std::shared_ptr<ExecutorPy>;

@@ -125,7 +129,6 @@ void InitHccl();
void FinalizeHccl();
void InitBackend();
void FinalizeBackend();

void ClearResAtexit();
void ReleaseGeTsd();



Loading…
Cancel
Save