|
|
|
@@ -70,7 +70,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const { |
|
|
|
MS_EXCEPTION_IF_NULL(kernel_graph); |
|
|
|
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); |
|
|
|
MS_EXCEPTION_IF_NULL(runtime_instance); |
|
|
|
opt::RemoveNopNode(kernel_graph); |
|
|
|
// opt::RemoveNopNode(kernel_graph); |
|
|
|
runtime_instance->AssignMemory(kernel_graph); |
|
|
|
} |
|
|
|
|
|
|
|
@@ -79,7 +79,7 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input |
|
|
|
MS_EXCEPTION_IF_NULL(kernel_graph); |
|
|
|
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); |
|
|
|
MS_EXCEPTION_IF_NULL(runtime_instance); |
|
|
|
opt::RemoveNopNode(kernel_graph); |
|
|
|
// opt::RemoveNopNode(kernel_graph); |
|
|
|
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); |
|
|
|
} |
|
|
|
|
|
|
|
@@ -106,7 +106,7 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList |
|
|
|
// Assign CUDA streams |
|
|
|
AssignStream(graph); |
|
|
|
// Remove NoOp from execution graph |
|
|
|
opt::HideNopNode(graph.get()); |
|
|
|
// opt::HideNopNode(graph.get()); |
|
|
|
// Build kernel if node is cnode |
|
|
|
BuildKernel(graph); |
|
|
|
// Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph |
|
|
|
|