| @@ -153,6 +153,34 @@ void KernelRuntime::RunOpAssignMemory(const std::vector<tensor::TensorPtr> &inpu | |||
| UpdateRefNodeOutputMem(graph); | |||
| } | |||
| void KernelRuntime::RunOpClearMemory(session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| // clear input parameter memory resource | |||
| for (const auto &input_node : graph->inputs()) { | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); | |||
| } | |||
| // clear input value node memory resource | |||
| for (const auto &value_node : graph->graph_value_nodes()) { | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); | |||
| } | |||
| for (const auto &cnode : graph->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // clear output memory resource | |||
| for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { | |||
| AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); | |||
| } | |||
| // clear workspace memory resource | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(cnode); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); | |||
| for (size_t index = 0; index < workspace_lists.size(); ++index) { | |||
| AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); | |||
| } | |||
| } | |||
| } | |||
| void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { | |||
| AssignStaticMemoryInput(graph); | |||
| AssignStaticMemoryValueNode(graph); | |||
| @@ -47,6 +47,7 @@ class KernelRuntime { | |||
| virtual bool Init() = 0; | |||
| virtual void AssignMemory(session::KernelGraph *graph); | |||
| void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph); | |||
| void RunOpClearMemory(session::KernelGraph *graph); | |||
| virtual bool Run(session::KernelGraph *graph); | |||
| virtual bool DumpData(session::KernelGraph *graph); | |||
| virtual bool RunTask(const session::KernelGraph *graph); | |||
| @@ -131,34 +131,6 @@ std::vector<BaseRef> GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar | |||
| return real_args; | |||
| } | |||
| void ClearRunOpMemoryResource(const KernelGraphPtr &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| // clear input parameter memory resource | |||
| for (const auto &input_node : kernel_graph->inputs()) { | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); | |||
| } | |||
| // clear input value node memory resource | |||
| for (const auto &value_node : kernel_graph->graph_value_nodes()) { | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); | |||
| } | |||
| for (const auto &cnode : kernel_graph->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // clear output memory resource | |||
| for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(cnode); ++index) { | |||
| AnfAlgo::SetOutputAddr(nullptr, index, cnode.get()); | |||
| } | |||
| // clear workspace memory resource | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(cnode); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| auto workspace_lists = kernel_mod->GetWorkspaceSizeList(); | |||
| for (size_t index = 0; index < workspace_lists.size(); ++index) { | |||
| AnfAlgo::SetWorkspaceAddr(nullptr, index, cnode.get()); | |||
| } | |||
| } | |||
| } | |||
| std::vector<CNodePtr> GetCNodes(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| std::vector<CNodePtr> cnodes = {}; | |||
| size_t i = 0; | |||
| @@ -518,7 +490,7 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr | |||
| } | |||
| py::object tuple_obj = utils::cast<PyObjectRef>(output_tensors).object_; | |||
| py::tuple tuple_tensors = py::cast<py::tuple>(tuple_obj); | |||
| ClearRunOpMemoryResource(graph); | |||
| RunOpMemoryClear(graph.get()); | |||
| MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!"; | |||
| return tuple_tensors; | |||
| } | |||
| @@ -652,6 +624,13 @@ void AscendSession::RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input | |||
| MS_LOG(INFO) << "Finish!"; | |||
| } | |||
| void AscendSession::RunOpMemoryClear(KernelGraph *kernel_graph) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| runtime_instance->RunOpClearMemory(kernel_graph); | |||
| } | |||
| void AscendSession::GenerateTaskInfo(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "Start!"; | |||
| (void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph); | |||
| @@ -80,6 +80,7 @@ class AscendSession : public SessionBasic { | |||
| void BuildKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void MemoryAlloc(KernelGraph *kernel_graph) const; | |||
| void RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const; | |||
| void RunOpMemoryClear(KernelGraph *kernel_graph) const; | |||
| void GenerateTaskInfo(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void LoadTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | |||
| @@ -86,6 +86,13 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input | |||
| runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); | |||
| } | |||
| void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); | |||
| MS_EXCEPTION_IF_NULL(runtime_instance); | |||
| runtime_instance->RunOpClearMemory(kernel_graph); | |||
| } | |||
| void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | |||
| std::vector<tensor::TensorPtr> inputs(inputs_const); | |||
| @@ -200,6 +207,10 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||
| void GPUSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info, | |||
| const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<int> &tensors_mask) { | |||
| // Check if the graph cache exists. | |||
| if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) { | |||
| return; | |||
| } | |||
| // Prepare the graph | |||
| auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| @@ -232,7 +243,7 @@ py::tuple GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph | |||
| } | |||
| py::object tuple_obj = utils::cast<PyObjectRef>(output_tensors).object_; | |||
| py::tuple tuple_tensors = py::cast<py::tuple>(tuple_obj); | |||
| run_op_graphs_.clear(); | |||
| RunOpClearMemory(kernel_graph.get()); | |||
| return tuple_tensors; | |||
| } | |||
| } // namespace gpu | |||
| @@ -59,6 +59,8 @@ class GPUSession : public SessionBasic { | |||
| void RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const; | |||
| void RunOpClearMemory(KernelGraph *kernel_graph) const; | |||
| void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs_const) const override; | |||