/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "backend/session/executor.h" #include #include "runtime/device/kernel_runtime_manager.h" #include "backend/session/executor_manager.h" #include "utils/comm_manager.h" #include "utils/scoped_long_running.h" namespace mindspore { namespace session { namespace { void UpdateOutputTensors(const VectorRef *outputs, const std::map &tensor_to_node) { MS_EXCEPTION_IF_NULL(outputs); for (auto item : *outputs) { if (utils::isa(item)) { auto vector_ref = utils::cast(item); UpdateOutputTensors(&vector_ref, tensor_to_node); } else if (utils::isa(item)) { auto tensor = utils::cast(item); MS_EXCEPTION_IF_NULL(tensor); auto iter = tensor_to_node.find(tensor); if (iter != tensor_to_node.end()) { auto &node = iter->second.first; auto &output_index = iter->second.second; auto address = AnfAlgo::GetMutableOutputAddr(node, output_index); tensor->set_device_address(address); if (AnfAlgo::IsDynamicShape(node)) { auto updated_shape = AnfAlgo::GetOutputInferShape(node, output_index); ShapeVector int_shape; std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); tensor->set_shape(int_shape); } } if (tensor->NeedSyncDeviceToHostImmediately()) { tensor->data_sync(false); tensor->set_device_address(nullptr); tensor->set_sync_status(kNeedSyncHostToDevice); } tensor->SetNeedWait(false); } } } bool TensorInVector(const VectorRef *outputs) { MS_EXCEPTION_IF_NULL(outputs); for (auto item : *outputs) { if (utils::isa(item)) { auto vector_ref = utils::cast(item); if (TensorInVector(&vector_ref)) { return true; } } else if (utils::isa(item)) { return true; } } return false; } } // namespace void CompileNodesTask::Run() { MS_EXCEPTION_IF_NULL(session_); graph_id_ = session_->CompileGraphImpl(nodes_, output_nodes_); } void CompileGraphTask::Run() { MS_EXCEPTION_IF_NULL(session_); graph_id_ = session_->CompileGraphImpl(NOT_NULL(func_graph_)); } void BuildGraphTask::Run() { MS_EXCEPTION_IF_NULL(session_); session_->BuildGraphImpl(graph_id_); } void RunGraphTask::Run() { MS_EXCEPTION_IF_NULL(session_); try { session_->RunGraphImpl(graph_id_, input_tensors_, &outputs_); } catch (const std::exception &e) { MsException::GetInstance().SetException(); } UpdateOutputTensors(&outputs_, tensor_to_node_); for (auto &tensor : input_need_lock_tensors_) { tensor->SetNeedWait(false); } ExecutorManager::Instance().OnRunGraphFinished(); } void BuildOpTask::Run() { MS_EXCEPTION_IF_NULL(session_); session_->BuildOpImpl(*op_run_info_, graph_info_, input_tensors_, tensors_mask_); } void RunOpTask::Run() { MS_EXCEPTION_IF_NULL(session_); session_->RunOpImpl(*op_run_info_, graph_info_, input_tensors_, &outputs_); } void CreateCommGroupTask::Run() { result_ = CommManager::GetInstance().CreateGroupSync(group_name_, ranks_); } void DestroyCommGroupTask::Run() { result_ = CommManager::GetInstance().DestroyGroup(group_name_); } Executor::Executor(const std::string &device_name, uint32_t device_id) { device_name_ = device_name; device_id_ = device_id; worker_ = std::make_shared(&Executor::WorkerLoop, this); } Executor::~Executor() { WorkerJoin(); } void Executor::WorkerJoin() { // Avoid worker thread join itself which will cause deadlock if (worker_->joinable() && worker_->get_id() != std::this_thread::get_id()) { { std::unique_lock lock(task_mutex_); auto task = std::make_shared(); ready_tasks_.push(task); task_cond_var_.notify_all(); } worker_->join(); } } void Executor::WorkerLoop() { while (true) { std::shared_ptr task; { std::unique_lock lock(task_mutex_); task_cond_var_.wait(lock, [this] { return !ready_tasks_.empty(); }); task = ready_tasks_.front(); ready_tasks_.pop(); } if (task->type_ == kExit) { OnWorkerExit(); return; } try { task->Run(); } catch (const std::exception &e) { MsException::GetInstance().SetException(); } if (task->type_ != kRunGraph || task->sync_run_) { task = nullptr; sync_cond_var_.notify_all(); } else { task = nullptr; } } } std::vector> Executor::GetNewReadyTasks() { std::vector> new_ready_tasks; std::unique_lock lock(pending_task_mutex_); for (auto iter = pending_tasks_.begin(); iter != pending_tasks_.end();) { auto task = *iter; if (IsTaskReady(task)) { new_ready_tasks.emplace_back(task); pending_tasks_.erase(iter++); } else { iter++; } } return new_ready_tasks; } void Executor::OnRunGraphFinished() { auto new_ready_tasks = GetNewReadyTasks(); std::unique_lock lock(task_mutex_); for (auto &task : new_ready_tasks) { ready_tasks_.push(task); } if (new_ready_tasks.size() > 0) { task_cond_var_.notify_all(); } } bool Executor::IsTaskReady(const std::shared_ptr &task) { MS_EXCEPTION_IF_NULL(task); for (auto &input : task->input_need_wait_tensors_) { MS_EXCEPTION_IF_NULL(input); if (input->NeedWait()) { return false; } } return true; } void Executor::SyncRunTask(const std::shared_ptr &task) { std::unique_lock lock(task_mutex_); ready_tasks_.push(task); task_cond_var_.notify_all(); sync_cond_var_.wait(lock); MsException::GetInstance().CheckException(); } GraphId Executor::CompileGraph(const SessionPtr &session, const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { auto task = std::make_shared(); task->session_ = session; task->nodes_ = lst; task->output_nodes_ = outputs; SyncRunTask(task); return task->graph_id_; } GraphId Executor::CompileGraph(const SessionPtr &session, NotNull func_graph) { auto task = std::make_shared(); task->session_ = session; task->func_graph_ = func_graph; SyncRunTask(task); return task->graph_id_; } void Executor::BuildGraph(const SessionPtr &session, GraphId graphId) { auto task = std::make_shared(); task->session_ = session; task->graph_id_ = graphId; SyncRunTask(task); } void Executor::RunGraph(const SessionPtr &session, const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) { MS_EXCEPTION_IF_NULL(session); MS_EXCEPTION_IF_NULL(outputs); auto task = std::make_shared(); task->session_ = session; task->graph_id_ = graph_id; task->input_tensors_ = inputs; session->CreateOutputTensors(graph_id, inputs, outputs, &task->tensor_to_node_); task->outputs_ = *outputs; task->sync_run_ = true; mindspore::ScopedLongRunning long_running; SyncRunTask(task); } void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) { MS_EXCEPTION_IF_NULL(session); MS_EXCEPTION_IF_NULL(outputs); if (session != nullptr) { RunGraph(session, graph_id, inputs, outputs); return; } auto task = std::make_shared(); task->session_ = session; task->graph_id_ = graph_id; task->input_tensors_ = inputs; task->input_need_lock_tensors_ = session->GetNeedLockInputTensors(graph_id, inputs); // lock inputs for (auto &tensor : inputs) { if (tensor->NeedWait()) { task->input_need_wait_tensors_.emplace_back(tensor); } } for (auto &tensor : task->input_need_lock_tensors_) { tensor->SetNeedWait(true); } session->CreateOutputTensors(graph_id, inputs, outputs, &task->tensor_to_node_); // maintain a copy of output vector task->outputs_ = *outputs; // sync run graph without output tensor(int dataset graph) if (!TensorInVector(outputs)) { task->sync_run_ = true; mindspore::ScopedLongRunning long_running; SyncRunTask(task); return; } bool ready = IsTaskReady(task); if (!ready) { std::unique_lock lock(pending_task_mutex_); pending_tasks_.push_back(task); return; } std::unique_lock lock(task_mutex_); ready_tasks_.push(task); task_cond_var_.notify_all(); } void Executor::BuildOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info, const std::vector &input_tensors, const std::vector &tensors_mask) { auto task = std::make_shared(); task->session_ = session; task->op_run_info_ = op_run_info; task->graph_info_ = graph_info; task->input_tensors_ = input_tensors; task->tensors_mask_ = tensors_mask; SyncRunTask(task); } void Executor::RunOp(const SessionPtr &session, OpRunInfo *op_run_info, const GraphInfo &graph_info, const std::vector &input_tensors, VectorRef *outputs) { auto task = std::make_shared(); task->session_ = session; task->op_run_info_ = op_run_info; task->graph_info_ = graph_info; task->input_tensors_ = input_tensors; SyncRunTask(task); *outputs = task->outputs_; } bool Executor::CreateCommGroup(const std::string &group_name, std::vector ranks) { auto task = std::make_shared(); task->group_name_ = group_name; task->ranks_ = ranks; SyncRunTask(task); return task->result_; } bool Executor::DestroyCommGroup(const std::string &group_name) { auto task = std::make_shared(); task->group_name_ = group_name; SyncRunTask(task); return task->result_; } void Executor::OnWorkerExit() { if (device_name_ == kAscendDevice) { device::KernelRuntimeManager::Instance().ReleaseKernelRuntime(kAscendDevice, device_id_); } } } // namespace session } // namespace mindspore