| @@ -81,6 +81,14 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr | |||||
| #endif | #endif | ||||
| MS_LOG(INFO) << "Build kernel"; | MS_LOG(INFO) << "Build kernel"; | ||||
| BuildKernel(graph.get()); | BuildKernel(graph.get()); | ||||
| // Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph | |||||
| auto execution_order = graph->execution_order(); | |||||
| Reorder(&execution_order); | |||||
| graph->set_execution_order(execution_order); | |||||
| // runtime init | |||||
| if (!runtime_.Init()) { | |||||
| MS_LOG(EXCEPTION) << "Kernel runtime init error."; | |||||
| } | |||||
| MS_LOG(INFO) << "Assign kernel address"; | MS_LOG(INFO) << "Assign kernel address"; | ||||
| runtime_.AssignKernelAddress(graph.get()); | runtime_.AssignKernelAddress(graph.get()); | ||||
| return graph_id; | return graph_id; | ||||
| @@ -116,11 +124,8 @@ void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor: | |||||
| #endif | #endif | ||||
| MS_LOG(INFO) << "Run graph start"; | MS_LOG(INFO) << "Run graph start"; | ||||
| auto execution_order = kernel_graph->execution_order(); | |||||
| Reorder(&execution_order); | |||||
| bool enable_summary = summary_callback_ != nullptr; | bool enable_summary = summary_callback_ != nullptr; | ||||
| kernel_graph->set_execution_order(execution_order); | |||||
| NamedSummaryOutputs summary_outputs; | NamedSummaryOutputs summary_outputs; | ||||
| if (enable_summary) { | if (enable_summary) { | ||||
| SetSummaryNodes(kernel_graph.get()); | SetSummaryNodes(kernel_graph.get()); | ||||
| @@ -181,16 +186,21 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, | |||||
| auto kernel_graph = run_op_graphs_[graph_info]; | auto kernel_graph = run_op_graphs_[graph_info]; | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| // Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph | |||||
| auto execution_order = kernel_graph->execution_order(); | |||||
| Reorder(&execution_order); | |||||
| kernel_graph->set_execution_order(execution_order); | |||||
| // runtime init | |||||
| if (!runtime_.Init()) { | |||||
| MS_LOG(EXCEPTION) << "Kernel runtime init error."; | |||||
| } | |||||
| runtime_.AssignKernelAddress(kernel_graph.get()); | runtime_.AssignKernelAddress(kernel_graph.get()); | ||||
| std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node; | std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node; | ||||
| runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node); | runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node); | ||||
| runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs); | runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs); | ||||
| MS_LOG(INFO) << "Run Op start"; | MS_LOG(INFO) << "Run Op start"; | ||||
| auto execution_order = kernel_graph->execution_order(); | |||||
| Reorder(&execution_order); | |||||
| kernel_graph->set_execution_order(execution_order); | |||||
| bool ret = runtime_.Run(kernel_graph.get(), false); | bool ret = runtime_.Run(kernel_graph.get(), false); | ||||
| if (!ret) { | if (!ret) { | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include <exception> | #include <exception> | ||||
| #include "backend/kernel_compiler/kernel.h" | #include "backend/kernel_compiler/kernel.h" | ||||
| #include "runtime/device/cpu/cpu_device_address.h" | #include "runtime/device/cpu/cpu_device_address.h" | ||||
| #include "runtime/device/cpu/cpu_memory_manager.h" | |||||
| #include "utils/ms_context.h" | #include "utils/ms_context.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "backend/session/session_basic.h" | #include "backend/session/session_basic.h" | ||||
| @@ -31,16 +32,47 @@ | |||||
| #include "utils/shape_utils.h" | #include "utils/shape_utils.h" | ||||
| #include "utils/profile.h" | #include "utils/profile.h" | ||||
| #include "utils/trace_base.h" | #include "utils/trace_base.h" | ||||
| #ifdef MEM_REUSE_DEBUG | |||||
| #include "backend/optimizer/mem_reuse/mem_reuse_checker.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| bool CPUKernelRuntime::Init() { | |||||
| if (initialized_) { | |||||
| return true; | |||||
| } | |||||
| mem_manager_ = std::make_shared<CPUMemoryManager>(); | |||||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||||
| initialized_ = true; | |||||
| return true; | |||||
| } | |||||
| const size_t INIT_NODE_REF = 1; | const size_t INIT_NODE_REF = 1; | ||||
| void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { | void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { | ||||
| AssignValueNodeAddress(kernel_graph); | AssignValueNodeAddress(kernel_graph); | ||||
| AssignInputNodeAddress(kernel_graph); | AssignInputNodeAddress(kernel_graph); | ||||
| AssignKernelOutputAddress(kernel_graph); | |||||
| resource_manager_.AssignMemory(kernel_graph); | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| bool is_enable_mem_reuse = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_REUSE); | |||||
| if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||||
| // disable mem reuse for kPynativeMode | |||||
| is_enable_mem_reuse = false; | |||||
| } | |||||
| if (is_enable_mem_reuse) { | |||||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||||
| mem_manager_->ResetDynamicMemory(); | |||||
| AssignDynamicMemory(kernel_graph); | |||||
| #ifdef MEM_REUSE_DEBUG | |||||
| // Get normal graph ir for memreuse | |||||
| mindspore::memreuse::MemReuseChecker::GetInstance().CheckNormalIR(kernel_graph); | |||||
| #endif | |||||
| } else { | |||||
| AssignKernelOutputAddress(kernel_graph); | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->AssignMemory(kernel_graph); | |||||
| } | |||||
| } | } | ||||
| void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph) { | void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph) { | ||||
| @@ -75,7 +107,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph | |||||
| if (tensor->data_type() == output_type_id) { | if (tensor->data_type() == output_type_id) { | ||||
| address->ptr_ = tensor->data_c(); | address->ptr_ = tensor->data_c(); | ||||
| } else { | } else { | ||||
| address->ptr_ = resource_manager_.MemMalloc(tensor_size); | |||||
| address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size); | |||||
| if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), | if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), | ||||
| tensor->data_c())) { | tensor->data_c())) { | ||||
| MS_LOG(EXCEPTION) << "Value node sync host to device failed!"; | MS_LOG(EXCEPTION) << "Value node sync host to device failed!"; | ||||
| @@ -169,7 +201,7 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput( | |||||
| size_t type_size = GetTypeByte(TypeIdToType(device_type_id)); | size_t type_size = GetTypeByte(TypeIdToType(device_type_id)); | ||||
| ShapeVector data_shape = tensor->shape(); | ShapeVector data_shape = tensor->shape(); | ||||
| size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); | size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); | ||||
| address->ptr_ = resource_manager_.MemMalloc(tensor_size); | |||||
| address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size); | |||||
| tensor->set_sync_status(kNeedSyncDeviceToHostImmediately); | tensor->set_sync_status(kNeedSyncDeviceToHostImmediately); | ||||
| } else { | } else { | ||||
| tensor->set_sync_status(kNoNeedSync); | tensor->set_sync_status(kNoNeedSync); | ||||
| @@ -268,7 +300,7 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker | |||||
| ShapeVector data_shape = tensor->shape(); | ShapeVector data_shape = tensor->shape(); | ||||
| size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), | size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), | ||||
| GetTypeByte(TypeIdToType(address->type_id_)), std::multiplies<size_t>()); | GetTypeByte(TypeIdToType(address->type_id_)), std::multiplies<size_t>()); | ||||
| address->ptr_ = resource_manager_.MemMalloc(tensor_size); | |||||
| address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size); | |||||
| if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), | if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), | ||||
| tensor->data_c())) { | tensor->data_c())) { | ||||
| MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!"; | MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!"; | ||||
| @@ -322,7 +354,7 @@ void CPUKernelRuntime::AddRuntimeAddress(DeviceAddress *address, std::vector<ker | |||||
| kernel::AddressPtr input = std::make_shared<kernel::Address>(); | kernel::AddressPtr input = std::make_shared<kernel::Address>(); | ||||
| MS_EXCEPTION_IF_NULL(input); | MS_EXCEPTION_IF_NULL(input); | ||||
| if (address->ptr_ == nullptr) { | if (address->ptr_ == nullptr) { | ||||
| address->ptr_ = resource_manager_.MemMalloc(address->size_); | |||||
| address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(address->size_); | |||||
| } | } | ||||
| MS_EXCEPTION_IF_NULL(address->ptr_); | MS_EXCEPTION_IF_NULL(address->ptr_); | ||||
| input->addr = address->ptr_; | input->addr = address->ptr_; | ||||
| @@ -331,16 +363,16 @@ void CPUKernelRuntime::AddRuntimeAddress(DeviceAddress *address, std::vector<ker | |||||
| } | } | ||||
| void CPUKernelRuntime::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | void CPUKernelRuntime::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | ||||
| resource_manager_.IncreaseSummaryRefCount(summary_outputs); | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseSummaryRefCount(summary_outputs); | |||||
| } | } | ||||
| void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | ||||
| resource_manager_.DecreaseSummaryRefCount(summary_outputs); | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseSummaryRefCount(summary_outputs); | |||||
| } | } | ||||
| bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink) { | bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| resource_manager_.IncreaseAddressRefCount(kernel_graph); | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); | |||||
| auto kernels = kernel_graph->execution_order(); | auto kernels = kernel_graph->execution_order(); | ||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| @@ -381,7 +413,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink | |||||
| if (!ret) { | if (!ret) { | ||||
| MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); | MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); | ||||
| } | } | ||||
| resource_manager_.DecreaseAddressRefCount(kernel); | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseAddressRefCount(kernel); | |||||
| #ifdef ENABLE_PROFILE | #ifdef ENABLE_PROFILE | ||||
| double cost_time = GetTime() - start_time; | double cost_time = GetTime() - start_time; | ||||
| MS_LOG(INFO) << "cpu kernel: " << kernel->fullname_with_scope() << " costs " << cost_time * 1e6 << " us"; | MS_LOG(INFO) << "cpu kernel: " << kernel->fullname_with_scope() << " costs " << cost_time * 1e6 << " us"; | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include "backend/session/kernel_graph.h" | #include "backend/session/kernel_graph.h" | ||||
| #include "backend/session/session_basic.h" | #include "backend/session/session_basic.h" | ||||
| #include "runtime/device/cpu/cpu_resource_manager.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "utils/any.h" | #include "utils/any.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -35,7 +34,7 @@ class CPUKernelRuntime : public KernelRuntime { | |||||
| CPUKernelRuntime() = default; | CPUKernelRuntime() = default; | ||||
| ~CPUKernelRuntime() override = default; | ~CPUKernelRuntime() override = default; | ||||
| bool Init() override { return true; } | |||||
| bool Init(); | |||||
| bool Run(session::KernelGraph *graph, bool is_task_sink) override; | bool Run(session::KernelGraph *graph, bool is_task_sink) override; | ||||
| void AssignKernelAddress(session::KernelGraph *kernel_graph); | void AssignKernelAddress(session::KernelGraph *kernel_graph); | ||||
| void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs, | void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs, | ||||
| @@ -63,9 +62,9 @@ class CPUKernelRuntime : public KernelRuntime { | |||||
| void AssignInputNodeAddress(const session::KernelGraph *kernel_graph); | void AssignInputNodeAddress(const session::KernelGraph *kernel_graph); | ||||
| void AssignKernelOutputAddress(const session::KernelGraph *kernel_graph); | void AssignKernelOutputAddress(const session::KernelGraph *kernel_graph); | ||||
| void AddRuntimeAddress(DeviceAddress *address, std::vector<kernel::AddressPtr> *input_list); | void AddRuntimeAddress(DeviceAddress *address, std::vector<kernel::AddressPtr> *input_list); | ||||
| CPUResourceManager resource_manager_; | |||||
| std::set<DeviceAddressPtr> bound_addresses_; | std::set<DeviceAddressPtr> bound_addresses_; | ||||
| std::map<AnfNodePtr, tensor::TensorPtr> input_param_tensor_map_; | std::map<AnfNodePtr, tensor::TensorPtr> input_param_tensor_map_; | ||||
| bool initialized_{false}; | |||||
| }; | }; | ||||
| } // namespace cpu | } // namespace cpu | ||||
| } // namespace device | } // namespace device | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -13,28 +13,90 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "runtime/device/cpu/cpu_resource_manager.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "runtime/device/cpu/cpu_memory_manager.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "utils/ms_context.h" | |||||
| #include "utils/convert_utils.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| CPUResourceManager::~CPUResourceManager() { MemFree(); } | |||||
| void CPUResourceManager::MemFree() { | |||||
| uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool) { | |||||
| void *ptr = malloc(size); | |||||
| if (ptr != nullptr) { | |||||
| memset_s(ptr, size, 0, size); | |||||
| static_mem_[ptr] = size; | |||||
| return reinterpret_cast<uint8_t *>(ptr); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Malloc memory failed: size " << size; | |||||
| } | |||||
| } | |||||
| uint8_t *CPUMemoryManager::MallocDynamicMem(size_t size, bool) { | |||||
| void *ptr = nullptr; | |||||
| size_t min_size = 0; | |||||
| // first find the smallest cached_mem_ which fits the size | |||||
| for (auto &&iter : cached_mem_) { | |||||
| if (iter.second >= size) { | |||||
| if (min_size == 0) { | |||||
| ptr = iter.first; | |||||
| min_size = iter.second; | |||||
| } else if (iter.second < min_size) { | |||||
| ptr = iter.first; | |||||
| min_size = iter.second; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (ptr != nullptr) { | |||||
| memset_s(ptr, size, 0, size); | |||||
| dynamic_mem_[ptr] = min_size; | |||||
| (void)cached_mem_.erase(ptr); | |||||
| return reinterpret_cast<uint8_t *>(ptr); | |||||
| } | |||||
| // if not found, malloc | |||||
| ptr = malloc(size); | |||||
| if (ptr != nullptr) { | |||||
| memset_s(ptr, size, 0, size); | |||||
| dynamic_mem_[ptr] = size; | |||||
| return reinterpret_cast<uint8_t *>(ptr); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Malloc memory failed: size " << size; | |||||
| } | |||||
| } | |||||
| void CPUMemoryManager::ResetDynamicMemory() { | |||||
| // don't free, for multi graph | |||||
| for (auto &&iter : dynamic_mem_) { | |||||
| cached_mem_[iter.first] = iter.second; | |||||
| } | |||||
| dynamic_mem_.clear(); | |||||
| } | |||||
| CPUMemoryManager::~CPUMemoryManager() { MemFree(); } | |||||
| void CPUMemoryManager::MemFree() { | |||||
| if (mem_ptr_ != nullptr) { | if (mem_ptr_ != nullptr) { | ||||
| free(mem_ptr_); | free(mem_ptr_); | ||||
| mem_ptr_ = nullptr; | mem_ptr_ = nullptr; | ||||
| mem_size_ = 0; | mem_size_ = 0; | ||||
| } | } | ||||
| for (auto &&iter : static_mem_) { | |||||
| free(iter.first); | |||||
| } | |||||
| static_mem_.clear(); | |||||
| for (auto &&iter : dynamic_mem_) { | for (auto &&iter : dynamic_mem_) { | ||||
| free(iter.first); | free(iter.first); | ||||
| } | } | ||||
| dynamic_mem_.clear(); | dynamic_mem_.clear(); | ||||
| for (auto &&iter : cached_mem_) { | |||||
| free(iter.first); | |||||
| } | |||||
| cached_mem_.clear(); | |||||
| } | } | ||||
| void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) { | |||||
| void CPUMemoryManager::AssignMemory(const session::KernelGraph *graph) { | |||||
| size_t graph_mem_size = mem_plan_.MemPlan(graph); | size_t graph_mem_size = mem_plan_.MemPlan(graph); | ||||
| if (graph_mem_size > mem_size_) { | if (graph_mem_size > mem_size_) { | ||||
| if (mem_size_ > 0) { | if (mem_size_ > 0) { | ||||
| @@ -43,6 +105,7 @@ void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) { | |||||
| } | } | ||||
| mem_ptr_ = reinterpret_cast<uint8_t *>(malloc(graph_mem_size)); | mem_ptr_ = reinterpret_cast<uint8_t *>(malloc(graph_mem_size)); | ||||
| if (mem_ptr_ != nullptr) { | if (mem_ptr_ != nullptr) { | ||||
| MS_LOG(INFO) << "Simple MemPlan GraphMemSize [" << graph_mem_size << "]"; | |||||
| mem_size_ = graph_mem_size; | mem_size_ = graph_mem_size; | ||||
| dynamic_malloc_ = false; | dynamic_malloc_ = false; | ||||
| } else { | } else { | ||||
| @@ -56,26 +119,26 @@ void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) { | |||||
| mem_plan_.MemAssign(graph, mem_ptr_); | mem_plan_.MemAssign(graph, mem_ptr_); | ||||
| } | } | ||||
| void *CPUResourceManager::MemMalloc(size_t mem_size) { | |||||
| void *CPUMemoryManager::StaticMemMalloc(size_t mem_size) { | |||||
| void *ptr = malloc(mem_size); | void *ptr = malloc(mem_size); | ||||
| if (ptr != nullptr) { | if (ptr != nullptr) { | ||||
| memset_s(ptr, mem_size, 0, mem_size); | memset_s(ptr, mem_size, 0, mem_size); | ||||
| dynamic_mem_[ptr] = mem_size; | |||||
| static_mem_[ptr] = mem_size; | |||||
| return ptr; | return ptr; | ||||
| } else { | } else { | ||||
| MS_LOG(EXCEPTION) << "Malloc memory failed: size " << mem_size; | MS_LOG(EXCEPTION) << "Malloc memory failed: size " << mem_size; | ||||
| } | } | ||||
| } | } | ||||
| void CPUResourceManager::MemFree(void *ptr) { | |||||
| auto iter = dynamic_mem_.find(ptr); | |||||
| if (iter != dynamic_mem_.end()) { | |||||
| (void)dynamic_mem_.erase(iter); | |||||
| void CPUMemoryManager::MemFree(void *ptr) { | |||||
| auto iter = static_mem_.find(ptr); | |||||
| if (iter != static_mem_.end()) { | |||||
| (void)static_mem_.erase(iter); | |||||
| free(ptr); | free(ptr); | ||||
| } | } | ||||
| } | } | ||||
| void CPUResourceManager::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | |||||
| void CPUMemoryManager::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | |||||
| if (!dynamic_malloc_) { | if (!dynamic_malloc_) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -93,7 +156,7 @@ void CPUResourceManager::IncreaseSummaryRefCount(const session::NamedSummaryOutp | |||||
| } | } | ||||
| } | } | ||||
| void CPUResourceManager::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | |||||
| void CPUMemoryManager::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) { | |||||
| if (!dynamic_malloc_) { | if (!dynamic_malloc_) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -115,7 +178,7 @@ void CPUResourceManager::DecreaseSummaryRefCount(const session::NamedSummaryOutp | |||||
| } | } | ||||
| } | } | ||||
| void CPUResourceManager::IncreaseAddressRefCount(const session::KernelGraph *graph) { | |||||
| void CPUMemoryManager::IncreaseAddressRefCount(const session::KernelGraph *graph) { | |||||
| if (!dynamic_malloc_) { | if (!dynamic_malloc_) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -140,7 +203,7 @@ void CPUResourceManager::IncreaseAddressRefCount(const session::KernelGraph *gra | |||||
| } | } | ||||
| } | } | ||||
| void CPUResourceManager::DecreaseAddressRefCount(const AnfNodePtr &kernel) { | |||||
| void CPUMemoryManager::DecreaseAddressRefCount(const AnfNodePtr &kernel) { | |||||
| if (!dynamic_malloc_) { | if (!dynamic_malloc_) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -13,31 +13,40 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_ | |||||
| #define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_ | |||||
| #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_ | |||||
| #define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_ | |||||
| #include <vector> | #include <vector> | ||||
| #include <map> | #include <map> | ||||
| #include "backend/session/kernel_graph.h" | #include "backend/session/kernel_graph.h" | ||||
| #include "backend/session/session_basic.h" | #include "backend/session/session_basic.h" | ||||
| #include "runtime/device/device_address.h" | #include "runtime/device/device_address.h" | ||||
| #include "runtime/device/memory_manager.h" | |||||
| #include "runtime/device/cpu/cpu_simple_mem_plan.h" | #include "runtime/device/cpu/cpu_simple_mem_plan.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| class CPUResourceManager { | |||||
| class CPUMemoryManager : public MemoryManager { | |||||
| public: | public: | ||||
| CPUResourceManager() = default; | |||||
| ~CPUResourceManager(); | |||||
| CPUMemoryManager() = default; | |||||
| virtual ~CPUMemoryManager(); | |||||
| void MallocDeviceMemory() override {} | |||||
| void FreeDeviceMemory() override {} | |||||
| void ResetDynamicMemory() override; | |||||
| void AssignMemory(const session::KernelGraph *graph); | void AssignMemory(const session::KernelGraph *graph); | ||||
| void IncreaseAddressRefCount(const session::KernelGraph *graph); | void IncreaseAddressRefCount(const session::KernelGraph *graph); | ||||
| void DecreaseAddressRefCount(const AnfNodePtr &kernel); | void DecreaseAddressRefCount(const AnfNodePtr &kernel); | ||||
| void *MemMalloc(size_t mem_size); | |||||
| void *StaticMemMalloc(size_t mem_size); | |||||
| void MemFree(void *ptr); | void MemFree(void *ptr); | ||||
| void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | ||||
| void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); | ||||
| protected: | |||||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||||
| uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; | |||||
| private: | private: | ||||
| void MemFree(); | void MemFree(); | ||||
| CPUSimpleMemPlan mem_plan_; | CPUSimpleMemPlan mem_plan_; | ||||
| @@ -46,9 +55,10 @@ class CPUResourceManager { | |||||
| uint8_t *mem_ptr_{nullptr}; | uint8_t *mem_ptr_{nullptr}; | ||||
| bool dynamic_malloc_{false}; | bool dynamic_malloc_{false}; | ||||
| std::map<void *, size_t> dynamic_mem_; | std::map<void *, size_t> dynamic_mem_; | ||||
| std::map<void *, size_t> static_mem_; | |||||
| std::map<void *, size_t> cached_mem_; | |||||
| }; | }; | ||||
| } // namespace cpu | } // namespace cpu | ||||
| } // namespace device | } // namespace device | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_ | |||||
| #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_ | |||||
| @@ -28,7 +28,7 @@ namespace mindspore { | |||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| class CPUSimpleMemPlan; | class CPUSimpleMemPlan; | ||||
| class CPUResourceManager; | |||||
| class CPUMemoryManager; | |||||
| class CPUKernelRuntime; | class CPUKernelRuntime; | ||||
| } // namespace cpu | } // namespace cpu | ||||
| namespace ascend { | namespace ascend { | ||||
| @@ -93,7 +93,7 @@ class DeviceAddress : public mindspore::DeviceSync { | |||||
| friend class MemoryManager; | friend class MemoryManager; | ||||
| friend class mindspore::device::ascend::tasksink::TaskGenerator; | friend class mindspore::device::ascend::tasksink::TaskGenerator; | ||||
| friend class mindspore::device::cpu::CPUSimpleMemPlan; | friend class mindspore::device::cpu::CPUSimpleMemPlan; | ||||
| friend class mindspore::device::cpu::CPUResourceManager; | |||||
| friend class mindspore::device::cpu::CPUMemoryManager; | |||||
| friend class mindspore::device::cpu::CPUKernelRuntime; | friend class mindspore::device::cpu::CPUKernelRuntime; | ||||
| friend class mindspore::device::gpu::GPUKernelRuntime; | friend class mindspore::device::gpu::GPUKernelRuntime; | ||||
| friend class mindspore::device::gpu::GPUMemoryManager; | friend class mindspore::device::gpu::GPUMemoryManager; | ||||