Browse Source

Fix the kernel build server quit issue.

tags/v1.0.0
Zhang Qinghua 5 years ago
parent
commit
83b7976d67
3 changed files with 19 additions and 14 deletions
  1. +2
    -1
      mindspore/ccsrc/backend/session/kernel_build_client.h
  2. +4
    -4
      mindspore/ccsrc/common/duplex_pipe.cc
  3. +13
    -9
      mindspore/ccsrc/common/duplex_pipe.h

+ 2
- 1
mindspore/ccsrc/backend/session/kernel_build_client.h View File

@@ -59,7 +59,8 @@ class KernelBuildClient {
// Exception's thrown if open failed
if (dp_->Open({GetEnv(), GetScript()}, true) != -1) {
dp_->SetTimeOutSeconds(kTimeOutSeconds);
dp_->SetTimeOutCallback([this]() { SendRequest(kFinish); });
dp_->SetTimeOutCallback(std::make_shared<std::function<void()>>([this]() { SendRequest(kFinish); }));
dp_->SetFinalizeCallback(std::make_shared<std::function<void()>>([this]() { Close(); }));
init_ = true;
}
}


+ 4
- 4
mindspore/ccsrc/common/duplex_pipe.cc View File

@@ -168,7 +168,7 @@ void DuplexPipe::SignalHandler::CancelAlarm() { alarm(0); }
void DuplexPipe::SignalHandler::SigAlarmHandler(int sig) {
DP_INFO << "Signal: " << sig << ", child_pid_: " << child_pid_;
if (!dp_.expired()) {
dp_.lock()->TimeOut();
dp_.lock()->NotifyTimeOut();
}
}

@@ -184,9 +184,9 @@ void DuplexPipe::SignalHandler::SigChildHandler(int sig) {
int status;
auto pid = waitpid(child_pid_, &status, WNOHANG | WUNTRACED);
if (WIFEXITED(status)) {
DP_INFO << "Child exited, status: " << WEXITSTATUS(status) << ", pid: " << pid;
if (!dp_.expired()) {
dp_.lock()->Close();
DP_INFO << "Child exited, status: " << WEXITSTATUS(status) << ", pid: " << pid << ", dp expired: " << dp_.expired();
if (pid > 0 && !dp_.expired()) {
dp_.lock()->NotifyFinalize();
}
} else if (WIFSTOPPED(status)) {
DP_INFO << "Child stopped, sig: " << WSTOPSIG(status) << ", pid: " << pid;


+ 13
- 9
mindspore/ccsrc/common/duplex_pipe.h View File

@@ -45,10 +45,8 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
int Open(std::initializer_list<std::string> arg_list, bool append_fds = false);
void Close();
void SetTimeOutSeconds(unsigned int secs) { time_out_secs_ = secs; }
void SetTimeOutCallback(const std::function<void()> &cb) {
has_time_out_callback_ = true;
time_out_callback_ = cb;
}
void SetTimeOutCallback(const std::shared_ptr<std::function<void()>> cb) { time_out_callback_ = cb; }
void SetFinalizeCallback(const std::shared_ptr<std::function<void()>> cb) { finalize_callback_ = cb; }

// Write the 'buf' to remote stdin
void Write(const std::string &buf, bool flush = true);
@@ -64,14 +62,20 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
private:
void SetTimeOut() { signal_handler_->SetAlarm(time_out_secs_); }
void CancelTimeOut() { signal_handler_->CancelAlarm(); }
void TimeOut() {
if (has_time_out_callback_) {
time_out_callback_();
void NotifyTimeOut() {
if (time_out_callback_ != nullptr) {
(*time_out_callback_)();
}
Close();
DP_EXCEPTION << "Time out when read from pipe";
}

void NotifyFinalize() {
if (finalize_callback_ != nullptr) {
(*finalize_callback_)();
}
}

// Subprocess id in parent process,
// otherwise zero in child process.
pid_t pid_;
@@ -115,8 +119,8 @@ class DuplexPipe : public std::enable_shared_from_this<mindspore::DuplexPipe> {
};

unsigned int time_out_secs_ = kTimeOutSeconds;
bool has_time_out_callback_ = false;
std::function<void()> time_out_callback_;
std::shared_ptr<std::function<void()>> time_out_callback_;
std::shared_ptr<std::function<void()>> finalize_callback_;
std::shared_ptr<SignalHandler> signal_handler_;
};
} // namespace mindspore


Loading…
Cancel
Save