| @@ -31,6 +31,8 @@ DatasetIteratorKernel::DatasetIteratorKernel() : handle_(HandleMgr::INVALID_HAND | |||||
| DatasetIteratorKernel::~DatasetIteratorKernel() { GpuBufferMgr::GetInstance().Close(handle_); } | DatasetIteratorKernel::~DatasetIteratorKernel() { GpuBufferMgr::GetInstance().Close(handle_); } | ||||
| void DatasetIteratorKernel::ReleaseResource() { GpuBufferMgr::GetInstance().Close(handle_); } | |||||
| const std::vector<size_t> &DatasetIteratorKernel::GetInputSizeList() const { return input_size_list_; } | const std::vector<size_t> &DatasetIteratorKernel::GetInputSizeList() const { return input_size_list_; } | ||||
| const std::vector<size_t> &DatasetIteratorKernel::GetOutputSizeList() const { return output_size_list_; } | const std::vector<size_t> &DatasetIteratorKernel::GetOutputSizeList() const { return output_size_list_; } | ||||
| @@ -35,6 +35,7 @@ class DatasetIteratorKernel : public GpuKernel { | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | ||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | ||||
| bool Init(const CNodePtr &kernel_node) override; | bool Init(const CNodePtr &kernel_node) override; | ||||
| void ReleaseResource() override; | |||||
| protected: | protected: | ||||
| void InitSizeLists() override; | void InitSizeLists() override; | ||||
| @@ -119,6 +119,7 @@ class KernelMod { | |||||
| virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | ||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0; | const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0; | ||||
| virtual std::vector<size_t> GenParameters() { return {}; } | virtual std::vector<size_t> GenParameters() { return {}; } | ||||
| virtual void ReleaseResource() {} | |||||
| virtual ~KernelMod() = default; | virtual ~KernelMod() = default; | ||||
| void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } | void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } | ||||
| @@ -1125,6 +1125,9 @@ void KernelGraph::UpdateChildGraphOrder() { | |||||
| std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } | std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } | ||||
| KernelGraph::~KernelGraph() { device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_); } | |||||
| KernelGraph::~KernelGraph() { | |||||
| device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_, *inputs_, graph_value_nodes_, | |||||
| execution_order_); | |||||
| } | |||||
| } // namespace session | } // namespace session | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -115,7 +115,9 @@ void AscendKernelRuntime::ClearGraphModelMap() { | |||||
| } | } | ||||
| } | } | ||||
| void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | |||||
| void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &, | |||||
| const std::unordered_set<ValueNodePtr> &, | |||||
| const std::vector<CNodePtr> &) { | |||||
| MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper"; | MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper"; | ||||
| if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { | if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { | ||||
| MS_LOG(DEBUG) << "Unload dump info " << graph_id; | MS_LOG(DEBUG) << "Unload dump info " << graph_id; | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <unordered_set> | |||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include "runtime/context.h" | #include "runtime/context.h" | ||||
| #include "framework/ge_runtime/davinci_model.h" | #include "framework/ge_runtime/davinci_model.h" | ||||
| @@ -43,7 +44,9 @@ class AscendKernelRuntime : public KernelRuntime { | |||||
| bool GenTask(const session::KernelGraph *graph) override; | bool GenTask(const session::KernelGraph *graph) override; | ||||
| bool RunTask(const session::KernelGraph *graph) override; | bool RunTask(const session::KernelGraph *graph) override; | ||||
| bool LoadTask(const session::KernelGraph *graph) override; | bool LoadTask(const session::KernelGraph *graph) override; | ||||
| void ClearGraphRuntimeResource(uint32_t graph_id) override; | |||||
| void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, | |||||
| const std::unordered_set<ValueNodePtr> &value_nodes, | |||||
| const std::vector<CNodePtr> &execution_order) override; | |||||
| bool SyncStream() override; | bool SyncStream() override; | ||||
| protected: | protected: | ||||
| @@ -397,6 +397,18 @@ void GPUKernelRuntime::ReleaseDeviceRes() { | |||||
| bin_map->RemoveKernelCache(); | bin_map->RemoveKernelCache(); | ||||
| } | } | ||||
| void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &, | |||||
| const std::unordered_set<ValueNodePtr> &, | |||||
| const std::vector<CNodePtr> &execution_order) { | |||||
| MS_LOG(INFO) << "Clear graph:" << graph_id << " GPU runtime resource"; | |||||
| // Release the kernel resource. | |||||
| for (const auto &kernel : execution_order) { | |||||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | |||||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||||
| kernel_mod->ReleaseResource(); | |||||
| } | |||||
| } | |||||
| void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { | void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { | ||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <utility> | #include <utility> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <unordered_set> | |||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include "runtime/device/kernel_runtime_manager.h" | #include "runtime/device/kernel_runtime_manager.h" | ||||
| #include "backend/optimizer/mem_reuse/mem_swap_manager.h" | #include "backend/optimizer/mem_reuse/mem_swap_manager.h" | ||||
| @@ -37,6 +38,9 @@ class GPUKernelRuntime : public KernelRuntime { | |||||
| ~GPUKernelRuntime() override = default; | ~GPUKernelRuntime() override = default; | ||||
| bool Init() override; | bool Init() override; | ||||
| void ReleaseDeviceRes() override; | void ReleaseDeviceRes() override; | ||||
| void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, | |||||
| const std::unordered_set<ValueNodePtr> &value_nodes, | |||||
| const std::vector<CNodePtr> &execution_order) override; | |||||
| void AssignMemory(session::KernelGraph *graph) override; | void AssignMemory(session::KernelGraph *graph) override; | ||||
| bool Run(session::KernelGraph *graph, Debugger *debugger = nullptr) override; | bool Run(session::KernelGraph *graph, Debugger *debugger = nullptr) override; | ||||
| #ifdef ENABLE_DUMP_E2E | #ifdef ENABLE_DUMP_E2E | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <utility> | #include <utility> | ||||
| #include <numeric> | #include <numeric> | ||||
| #include <functional> | |||||
| #include "utils/ms_utils.h" | #include "utils/ms_utils.h" | ||||
| #include "common/trans.h" | #include "common/trans.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| @@ -841,7 +842,8 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | |||||
| void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &, | |||||
| const std::unordered_set<ValueNodePtr> &, const std::vector<CNodePtr> &) { | |||||
| MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource"; | MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource"; | ||||
| } | } | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <map> | #include <map> | ||||
| #include <unordered_set> | |||||
| #include "runtime/device/device_address.h" | #include "runtime/device/device_address.h" | ||||
| #include "ir/tensor.h" | #include "ir/tensor.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| @@ -69,7 +69,9 @@ class KernelRuntime { | |||||
| const AddressPtrList &kernel_workspaces) const; | const AddressPtrList &kernel_workspaces) const; | ||||
| virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); | virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); | ||||
| virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph); | virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph); | ||||
| virtual void ClearGraphRuntimeResource(uint32_t graph_id); | |||||
| virtual void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, | |||||
| const std::unordered_set<ValueNodePtr> &value_nodes, | |||||
| const std::vector<CNodePtr> &execution_order); | |||||
| virtual bool SyncStream() = 0; | virtual bool SyncStream() = 0; | ||||
| #ifdef ENABLE_DUMP_E2E | #ifdef ENABLE_DUMP_E2E | ||||
| @@ -29,7 +29,9 @@ void KernelRuntimeManager::ClearRuntimeResource() { | |||||
| runtime_map_.clear(); | runtime_map_.clear(); | ||||
| } | } | ||||
| void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id) { | |||||
| void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, | |||||
| const std::unordered_set<ValueNodePtr> &value_nodes, | |||||
| const std::vector<CNodePtr> &execution_order) { | |||||
| std::lock_guard<std::mutex> guard(lock_); | std::lock_guard<std::mutex> guard(lock_); | ||||
| for (auto &iter : runtime_map_) { | for (auto &iter : runtime_map_) { | ||||
| MS_LOG(INFO) << "Clear device " << iter.first << " graph " << graph_id << " runtime resource"; | MS_LOG(INFO) << "Clear device " << iter.first << " graph " << graph_id << " runtime resource"; | ||||
| @@ -37,7 +39,7 @@ void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id) { | |||||
| MS_LOG(ERROR) << "Kernel runtime is nullptr"; | MS_LOG(ERROR) << "Kernel runtime is nullptr"; | ||||
| continue; | continue; | ||||
| } | } | ||||
| iter.second->ClearGraphRuntimeResource(graph_id); | |||||
| iter.second->ClearGraphRuntimeResource(graph_id, inputs, value_nodes, execution_order); | |||||
| } | } | ||||
| } | } | ||||
| @@ -22,6 +22,8 @@ | |||||
| #include <functional> | #include <functional> | ||||
| #include <utility> | #include <utility> | ||||
| #include <mutex> | #include <mutex> | ||||
| #include <unordered_set> | |||||
| #include <vector> | |||||
| #include "utils/ms_utils.h" | #include "utils/ms_utils.h" | ||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -38,7 +40,9 @@ class KernelRuntimeManager { | |||||
| KernelRuntime *GetKernelRuntime(const std::string &device_name, uint32_t device_id); | KernelRuntime *GetKernelRuntime(const std::string &device_name, uint32_t device_id); | ||||
| KernelRuntime *GetSingleKernelRuntime(const std::string &device_name, uint32_t device_id); | KernelRuntime *GetSingleKernelRuntime(const std::string &device_name, uint32_t device_id); | ||||
| void ClearRuntimeResource(); | void ClearRuntimeResource(); | ||||
| void ClearGraphResource(uint32_t graph_id); | |||||
| void ClearGraphResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, | |||||
| const std::unordered_set<ValueNodePtr> &value_nodes, | |||||
| const std::vector<CNodePtr> &execution_order); | |||||
| private: | private: | ||||
| KernelRuntimeManager() = default; | KernelRuntimeManager() = default; | ||||