Browse Source

!11133 kill subprocess when main process exit

From: @zhoufeng54
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 5 years ago
parent
commit
6cae5a6a5d
4 changed files with 49 additions and 21 deletions
  1. +27
    -8
      mindspore/ccsrc/common/duplex_pipe.cc
  2. +9
    -9
      mindspore/ccsrc/common/duplex_pipe.h
  3. +8
    -2
      mindspore/ccsrc/minddata/dataset/util/sig_handler.cc
  4. +5
    -2
      mindspore/ccsrc/runtime/device/ascend/signal_util.cc

+ 27
- 8
mindspore/ccsrc/common/duplex_pipe.cc View File

@@ -22,6 +22,13 @@
#include <algorithm>

namespace mindspore {
DuplexPipe::~DuplexPipe() {
// pid_ < 0 means the child process is invalid or closed, pid_ == 0 means this process is child
if (pid_ > 0) {
(void)kill(pid_, SIGKILL);
}
}

int DuplexPipe::Open(std::initializer_list<std::string> arg_list, bool append_fds) {
if (pipe(fd1_) == -1) {
DP_EXCEPTION << "pipe 1 failed, errno: " << errno;
@@ -68,7 +75,7 @@ int DuplexPipe::Open(std::initializer_list<std::string> arg_list, bool append_fd
close(fd1_[0]);
close(fd2_[1]);

signal_handler_ = std::make_shared<SignalHandler>(shared_from_this(), pid_);
signal_handler_ = std::make_shared<SignalHandler>(weak_from_this(), &pid_);
}
return 0;
}
@@ -147,16 +154,17 @@ void DuplexPipe::Close() {
close(fd1_[1]);
close(fd2_[0]);
close(fd2_[1]);
pid_ = -1;
}

DuplexPipe::SignalHandler::SignalHandler(std::shared_ptr<DuplexPipe> dp, pid_t pid) {
DuplexPipe::SignalHandler::SignalHandler(const std::weak_ptr<DuplexPipe> &dp, pid_t *pid) {
dp_ = dp;
child_pid_ = pid;
signal(SIGCHLD, SigChildHandler);
signal(SIGPIPE, SigPipeHandler);
}

DuplexPipe::SignalHandler::~SignalHandler() { dp_.reset(); }
DuplexPipe::SignalHandler::~SignalHandler() {}

void DuplexPipe::SignalHandler::SetAlarm(unsigned int interval_secs) {
signal(SIGALRM, SigAlarmHandler);
@@ -167,20 +175,31 @@ void DuplexPipe::SignalHandler::CancelAlarm() { alarm(0); }

void DuplexPipe::SignalHandler::SigAlarmHandler(int sig) {
DP_INFO << "Signal: " << sig << ", child_pid_: " << child_pid_;
if (dp_ != nullptr) {
dp_->NotifyTimeOut();
auto shared_dp = dp_.lock();
if (shared_dp != nullptr) {
shared_dp->NotifyTimeOut();
}
if (child_pid_ != nullptr) {
*child_pid_ = -1;
}
}

void DuplexPipe::SignalHandler::SigPipeHandler(int sig) {
DP_INFO << "Signal: " << sig << ", child_pid_: " << child_pid_;
if (dp_ != nullptr) {
dp_->NotifyFinalize();
auto shared_dp = dp_.lock();
if (shared_dp != nullptr) {
shared_dp->NotifyFinalize();
}
if (child_pid_ != nullptr) {
*child_pid_ = -1;
}
}

void DuplexPipe::SignalHandler::SigChildHandler(int sig) {
int status;
(void)waitpid(child_pid_, &status, WNOHANG | WUNTRACED);
if (child_pid_ != nullptr) {
(void)waitpid(*child_pid_, &status, WNOHANG | WUNTRACED);
*child_pid_ = -1;
}
}
} // namespace mindspore

+ 9
- 9
mindspore/ccsrc/common/duplex_pipe.h View File

@@ -39,7 +39,7 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
constexpr inline static unsigned int kTimeOutSeconds = 5;

DuplexPipe() = default;
~DuplexPipe() = default;
~DuplexPipe();

// Create a subprocess and open a duplex pipe between local and remote
int Open(std::initializer_list<std::string> arg_list, bool append_fds = false);
@@ -84,10 +84,6 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
}
}

// Subprocess id in parent process,
// otherwise zero in child process.
pid_t pid_;

// Pipe: { Local:fd1_[1] --> Remote:fd1_[0] }
// Remote:fd1_[0] would be redirected by subprocess's stdin.
// Local:fd1_[1] would be used by 'Write()' as output.
@@ -109,7 +105,7 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {

class SignalHandler {
public:
SignalHandler(std::shared_ptr<DuplexPipe> dp, pid_t pid);
SignalHandler(const std::weak_ptr<DuplexPipe> &dp, pid_t *pid);
~SignalHandler();

void SetAlarm(unsigned int interval_secs);
@@ -120,15 +116,19 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
static void SigPipeHandler(int sig);
static void SigChildHandler(int sig);

inline static std::shared_ptr<DuplexPipe> dp_;
inline static pid_t child_pid_;
inline static std::weak_ptr<DuplexPipe> dp_;
inline static pid_t *child_pid_;
};

unsigned int time_out_secs_ = kTimeOutSeconds;
std::shared_ptr<std::function<void()>> time_out_callback_;
std::shared_ptr<std::function<void()>> finalize_callback_;
// signal_handler_ has a pid_ pointer, so it must be ahead of pid_
std::shared_ptr<SignalHandler> signal_handler_;

// Subprocess id in parent process,
// otherwise zero in child process.
pid_t pid_;
};
} // namespace mindspore

#endif // MINDSPORE_CCSRC_COMMON_DUPLEX_PIPE_H_

+ 8
- 2
mindspore/ccsrc/minddata/dataset/util/sig_handler.cc View File

@@ -28,14 +28,20 @@ namespace dataset {
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
void RegisterHandlers() {
struct sigaction new_int_action;
struct sigaction new_term_action;

// For the interrupt handler, we do not use SA_RESETHAND so this handler remains in play
// permanently, do not use the OS default handler for it.
new_int_action.sa_sigaction = &IntHandler;
new_int_action.sa_sigaction = [](int num, siginfo_t *info, void *ctx) { IntHandler(num, info, ctx); };
(void)sigemptyset(&new_int_action.sa_mask);
new_int_action.sa_flags = SA_RESTART | SA_SIGINFO;

new_term_action.sa_sigaction = [](int num, siginfo_t *info, void *ctx) { IntHandler(num, info, ctx); };
(void)sigemptyset(&new_term_action.sa_mask);
new_term_action.sa_flags = SA_RESTART | SA_SIGINFO;

(void)sigaction(SIGINT, &new_int_action, nullptr);
(void)sigaction(SIGTERM, &new_int_action, nullptr);
(void)sigaction(SIGTERM, &new_term_action, nullptr);
}

extern void IntHandler(int sig_num, // The signal that was raised


+ 5
- 2
mindspore/ccsrc/runtime/device/ascend/signal_util.cc View File

@@ -16,8 +16,8 @@

#include "runtime/device/ascend/signal_util.h"
#include <signal.h>
#include <iostream>
#include "utils/log_adapter.h"
#include "backend/session/kernel_build_client.h"

namespace mindspore {
SignalGuard::SignalGuard() { RegisterHandlers(); }
@@ -44,5 +44,8 @@ void SignalGuard::RegisterHandlers() {
(void)sigaction(SIGINT, &int_action, nullptr);
}

void SignalGuard::IntHandler(int, siginfo_t *, void *) { MS_LOG_EXCEPTION << "Exit"; }
void SignalGuard::IntHandler(int, siginfo_t *, void *) {
kernel::AscendKernelBuildClient::Instance().Close();
MS_LOG_EXCEPTION << "KeyboardInterrupt";
}
} // namespace mindspore

Loading…
Cancel
Save