Browse Source

check backend worker exception

tags/v1.0.0
kswang 5 years ago
parent
commit
4fb9e8c992
3 changed files with 30 additions and 5 deletions
  1. +23
    -1
      mindspore/ccsrc/backend/session/executor.cc
  2. +4
    -1
      mindspore/ccsrc/backend/session/executor.h
  3. +3
    -3
      mindspore/ccsrc/backend/session/executor_manager.h

+ 23
- 1
mindspore/ccsrc/backend/session/executor.cc View File

@@ -110,6 +110,12 @@ Executor::Executor(const std::string &device_name, uint32_t device_id) {
worker_ = std::make_shared<std::thread>(&Executor::WorkerLoop, this);
}

void Executor::CheckException() {
if (exception_ptr_ != nullptr) {
std::rethrow_exception(exception_ptr_);
}
}

void Executor::WorkerJoin() {
StopWorker();
worker_->join();
@@ -128,7 +134,11 @@ void Executor::WorkerLoop() {
OnWorkerExit();
return;
}
task->Run();
try {
task->Run();
} catch (const std::exception &e) {
exception_ptr_ = std::current_exception();
}
if (task->type_ == kCompileNodes) {
compile_cond_var_.notify_all();
} else if (task->type_ == kCompileGraph) {
@@ -183,6 +193,7 @@ bool Executor::IsAllInputsReady(const std::vector<tensor::TensorPtr> &inputs) {

GraphId Executor::CompileGraphAsync(const SessionPtr &session, const AnfNodePtrList &lst,
const AnfNodePtrList &outputs) {
CheckException();
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<CompileNodesTask>();
task->session_ = session;
@@ -191,10 +202,12 @@ GraphId Executor::CompileGraphAsync(const SessionPtr &session, const AnfNodePtrL
ready_tasks_.push(task);
task_cond_var_.notify_all();
compile_cond_var_.wait(lock);
CheckException();
return task->graph_id_;
}

GraphId Executor::CompileGraphAsync(const SessionPtr &session, NotNull<FuncGraphPtr> func_graph) {
CheckException();
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<CompileGraphTask>();
task->session_ = session;
@@ -202,10 +215,12 @@ GraphId Executor::CompileGraphAsync(const SessionPtr &session, NotNull<FuncGraph
ready_tasks_.push(task);
task_cond_var_.notify_all();
compile_cond_var_.wait(lock);
CheckException();
return task->graph_id_;
}

void Executor::BuildGraphAsync(const SessionPtr &session, GraphId graphId) {
CheckException();
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<BuildGraphTask>();
task->session_ = session;
@@ -213,10 +228,12 @@ void Executor::BuildGraphAsync(const SessionPtr &session, GraphId graphId) {
ready_tasks_.push(task);
task_cond_var_.notify_all();
build_cond_var_.wait(lock);
CheckException();
}

void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
CheckException();
auto task = std::make_shared<RunGraphTask>();
task->session_ = session;
task->graph_id_ = graph_id;
@@ -237,10 +254,12 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
task_cond_var_.notify_all();
py::gil_scoped_release release;
run_cond_var_.wait(lock);
CheckException();
}

void Executor::BuildOpAsync(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int> &tensors_mask) {
CheckException();
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<BuildOpTask>();
task->session_ = session;
@@ -251,10 +270,12 @@ void Executor::BuildOpAsync(const SessionPtr &session, OpRunInfo *op_run_info, c
ready_tasks_.push(task);
task_cond_var_.notify_all();
build_op_cond_var_.wait(lock);
CheckException();
}

py::tuple Executor::RunOpAsync(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors) {
CheckException();
std::unique_lock<std::mutex> lock(task_mutex_);
auto task = std::make_shared<RunOpTask>();
task->session_ = session;
@@ -264,6 +285,7 @@ py::tuple Executor::RunOpAsync(const SessionPtr &session, OpRunInfo *op_run_info
ready_tasks_.push(task);
task_cond_var_.notify_all();
run_op_cond_var_.wait(lock);
CheckException();

// Trans output to tuple
auto output_tensors = TransformBaseRefListToTuple(task->outputs_);


+ 4
- 1
mindspore/ccsrc/backend/session/executor.h View File

@@ -26,6 +26,7 @@
#include <thread>
#include <mutex>
#include <condition_variable>
#include <exception>
#include "backend/session/session_basic.h"
#include "ir/anf.h"
#include "ir/tensor.h"
@@ -128,11 +129,12 @@ class Executor {
const std::vector<tensor::TensorPtr> &input_tensors);
void OnRunGraphFinished();

protected:
private:
void UpdateOutputTensors(VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node);
std::vector<std::shared_ptr<RunGraphTask>> GetNewReadyTasks();
bool IsAllInputsReady(const std::vector<tensor::TensorPtr> &inputs);
void CheckException();
void StopWorker();
void OnWorkerExit();

@@ -149,6 +151,7 @@ class Executor {
std::queue<std::shared_ptr<Task>> ready_tasks_;
std::list<std::shared_ptr<RunGraphTask>> pending_tasks_;
std::shared_ptr<std::thread> worker_;
std::exception_ptr exception_ptr_{nullptr};
};
} // namespace session
} // namespace mindspore


+ 3
- 3
mindspore/ccsrc/backend/session/executor_manager.h View File

@@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANGER_H_
#define MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANGER_H_
#ifndef MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANAGER_H_
#define MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANAGER_H_
#include <set>
#include <map>
#include <string>
@@ -42,4 +42,4 @@ class ExecutorManager {
};
} // namespace session
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANGER_H_
#endif // MINDSPORE_CCSRC_BACKEND_SESSION_EXECUTOR_MANAGER_H_

Loading…
Cancel
Save