From 23b4b4d106b86737cb064a4946cb196f415b90df Mon Sep 17 00:00:00 2001 From: wilfChen Date: Mon, 18 May 2020 18:53:50 +0800 Subject: [PATCH] Gpu NoOp optimizer --- mindspore/ccsrc/pre_activate/common/helper.cc | 2 +- mindspore/ccsrc/session/gpu_session.cc | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc index 5cc3374ea5..e993e22929 100644 --- a/mindspore/ccsrc/pre_activate/common/helper.cc +++ b/mindspore/ccsrc/pre_activate/common/helper.cc @@ -328,7 +328,7 @@ tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { bool IsNopNode(const AnfNodePtr &node) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); - if (context_ptr->device_target() != kAscendDevice) { + if (context_ptr->device_target() != kAscendDevice && context_ptr->device_target() != kGPUDevice) { return false; } static std::unordered_set nop_nodes = {prim::kPrimReshape->name(), kExpandDimsOpName, diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index 3a80382e9b..3de7588652 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -20,6 +20,7 @@ #include "device/gpu/gpu_stream_assign.h" #include "pre_activate/common/optimizer.h" #include "pre_activate/common/pass_manager.h" +#include "pre_activate/common/helper.h" #include "pre_activate/pass/communication_op_fusion.h" #include "device/kernel_runtime_manager.h" #include "predict/predict.h" @@ -69,6 +70,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const { MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); + opt::RemoveNopNode(kernel_graph); runtime_instance->AssignMemory(kernel_graph); } @@ -77,6 +79,7 @@ void GPUSession::RunOpAllocateMemory(const std::vector &input MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); + opt::RemoveNopNode(kernel_graph); runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); } @@ -102,6 +105,8 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList Optimize(graph); // Assign CUDA streams AssignStream(graph); + // Remove NoOp from execution graph + opt::HideNopNode(graph.get()); // Build kernel if node is cnode BuildKernel(graph); // Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph