From 7dca9bfb37a67fd02e93db3924671b08c036d242 Mon Sep 17 00:00:00 2001 From: limingqi107 Date: Tue, 16 Jun 2020 20:30:55 +0800 Subject: [PATCH] gpu add the graph cache of pynative mode --- mindspore/ccsrc/device/kernel_runtime.cc | 28 +++++++++++++++++ mindspore/ccsrc/device/kernel_runtime.h | 1 + mindspore/ccsrc/session/ascend_session.cc | 37 +++++------------------ mindspore/ccsrc/session/ascend_session.h | 1 + mindspore/ccsrc/session/gpu_session.cc | 13 +++++++- mindspore/ccsrc/session/gpu_session.h | 2 ++ 6 files changed, 52 insertions(+), 30 deletions(-) diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index d3d9052c5f..92feabaf7d 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -153,6 +153,34 @@ void KernelRuntime::RunOpAssignMemory(const std::vector &inpu UpdateRefNodeOutputMem(graph); } +void KernelRuntime::RunOpClearMemory(session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + // clear input parameter memory resource + for (const auto &input_node : graph->inputs()) { + MS_EXCEPTION_IF_NULL(input_node); + AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); + } + // clear input value node memory resource + for (const auto &value_node : graph->graph_value_nodes()) { + MS_EXCEPTION_IF_NULL(value_node); + AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); + } + for (const auto &cnode : graph->execution_order()) { + MS_EXCEPTION_IF_NULL(cnode); + // clear output memory resource + for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { + AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); + } + // clear workspace memory resource + auto kernel_mod = AnfAlgo::GetKernelMod(cnode); + MS_EXCEPTION_IF_NULL(kernel_mod); + auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); + for (size_t index = 0; index < workspace_lists.size(); ++index) { + AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); + } + } +} + void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { AssignStaticMemoryInput(graph); AssignStaticMemoryValueNode(graph); diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h index c8ab084039..bfe857f61b 100644 --- a/mindspore/ccsrc/device/kernel_runtime.h +++ b/mindspore/ccsrc/device/kernel_runtime.h @@ -47,6 +47,7 @@ class KernelRuntime { virtual bool Init() = 0; virtual void AssignMemory(session::KernelGraph *graph); void RunOpAssignMemory(const std::vector &input_tensors, session::KernelGraph *graph); + void RunOpClearMemory(session::KernelGraph *graph); virtual bool Run(session::KernelGraph *graph); virtual bool DumpData(session::KernelGraph *graph); virtual bool RunTask(const session::KernelGraph *graph); diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc index bf1fa87530..6c19a43f3f 100644 --- a/mindspore/ccsrc/session/ascend_session.cc +++ b/mindspore/ccsrc/session/ascend_session.cc @@ -131,34 +131,6 @@ std::vector GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar return real_args; } -void ClearRunOpMemoryResource(const KernelGraphPtr &kernel_graph) { - MS_EXCEPTION_IF_NULL(kernel_graph); - // clear input parameter memory resource - for (const auto &input_node : kernel_graph->inputs()) { - MS_EXCEPTION_IF_NULL(input_node); - AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); - } - // clear input value node memory resource - for (const auto &value_node : kernel_graph->graph_value_nodes()) { - MS_EXCEPTION_IF_NULL(value_node); - AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); - } - for (const auto &cnode : kernel_graph->execution_order()) { - MS_EXCEPTION_IF_NULL(cnode); - // clear output memory resource - for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { - AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); - } - // clear workspace memory resource - auto kernel_mod = AnfAlgo::GetKernelMod(cnode); - MS_EXCEPTION_IF_NULL(kernel_mod); - auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); - for (size_t index = 0; index < workspace_lists.size(); ++index) { - AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); - } - } -} - std::vector GetCNodes(const std::vector &anf_nodes) { std::vector cnodes = {}; size_t i = 0; @@ -518,7 +490,7 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr } py::object tuple_obj = utils::cast(output_tensors).object_; py::tuple tuple_tensors = py::cast(tuple_obj); - ClearRunOpMemoryResource(graph); + RunOpMemoryClear(graph.get()); MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!"; return tuple_tensors; } @@ -652,6 +624,13 @@ void AscendSession::RunOpMemoryAlloc(const std::vector &input MS_LOG(INFO) << "Finish!"; } +void AscendSession::RunOpMemoryClear(KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + runtime_instance->RunOpClearMemory(kernel_graph); +} + void AscendSession::GenerateTaskInfo(const std::shared_ptr &kernel_graph) const { MS_LOG(INFO) << "Start!"; (void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph); diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h index 13ee80b254..55eb454633 100755 --- a/mindspore/ccsrc/session/ascend_session.h +++ b/mindspore/ccsrc/session/ascend_session.h @@ -80,6 +80,7 @@ class AscendSession : public SessionBasic { void BuildKernel(const std::shared_ptr &kernel_graph) const; void MemoryAlloc(KernelGraph *kernel_graph) const; void RunOpMemoryAlloc(const std::vector &input_tensors, KernelGraph *kernel_graph) const; + void RunOpMemoryClear(KernelGraph *kernel_graph) const; void GenerateTaskInfo(const std::shared_ptr &kernel_graph) const; void LoadTask(const std::shared_ptr &kernel_graph) const; void ExecTask(const std::shared_ptr &kernel_graph) const; diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index 999c5ba163..c9f135bba9 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -86,6 +86,13 @@ void GPUSession::RunOpAllocateMemory(const std::vector &input runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); } +void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + runtime_instance->RunOpClearMemory(kernel_graph); +} + void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const { std::vector inputs(inputs_const); @@ -200,6 +207,10 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector &input_tensors, const std::vector &tensors_mask) { + // Check if the graph cache exists. + if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) { + return; + } // Prepare the graph auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask); MS_EXCEPTION_IF_NULL(kernel_graph); @@ -232,7 +243,7 @@ py::tuple GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph } py::object tuple_obj = utils::cast(output_tensors).object_; py::tuple tuple_tensors = py::cast(tuple_obj); - run_op_graphs_.clear(); + RunOpClearMemory(kernel_graph.get()); return tuple_tensors; } } // namespace gpu diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/session/gpu_session.h index b396e4a9ba..0dfb815abe 100644 --- a/mindspore/ccsrc/session/gpu_session.h +++ b/mindspore/ccsrc/session/gpu_session.h @@ -59,6 +59,8 @@ class GPUSession : public SessionBasic { void RunOpAllocateMemory(const std::vector &input_tensors, KernelGraph *kernel_graph) const; + void RunOpClearMemory(KernelGraph *kernel_graph) const; + void LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const override;