| @@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||||
| MS_EXCEPTION_IF_NULL(input); | MS_EXCEPTION_IF_NULL(input); | ||||
| input->addr = device_address->ptr_; | input->addr = device_address->ptr_; | ||||
| input->size = device_address->size_; | input->size = device_address->size_; | ||||
| kernel_inputs->push_back(input); | |||||
| kernel_inputs->emplace_back(input); | |||||
| } | } | ||||
| auto output_sizes = kernel_mod.GetOutputSizeList(); | auto output_sizes = kernel_mod.GetOutputSizeList(); | ||||
| for (size_t i = 0; i < output_sizes.size(); ++i) { | for (size_t i = 0; i < output_sizes.size(); ++i) { | ||||
| auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); | auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); | ||||
| MS_EXCEPTION_IF_NULL(device_address); | MS_EXCEPTION_IF_NULL(device_address); | ||||
| if (device_address->ptr_ == nullptr) { | if (device_address->ptr_ == nullptr) { | ||||
| mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||||
| if (!ret) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| } | } | ||||
| kernel::AddressPtr output = std::make_shared<kernel::Address>(); | kernel::AddressPtr output = std::make_shared<kernel::Address>(); | ||||
| MS_EXCEPTION_IF_NULL(output); | MS_EXCEPTION_IF_NULL(output); | ||||
| output->addr = device_address->ptr_; | output->addr = device_address->ptr_; | ||||
| output->size = output_sizes[i]; | output->size = output_sizes[i]; | ||||
| kernel_outputs->push_back(output); | |||||
| kernel_outputs->emplace_back(output); | |||||
| } | } | ||||
| auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); | auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); | ||||
| for (size_t i = 0; i < workspace_sizes.size(); ++i) { | for (size_t i = 0; i < workspace_sizes.size(); ++i) { | ||||
| if (workspace_sizes[i] == 0) { | if (workspace_sizes[i] == 0) { | ||||
| @@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); | auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); | ||||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||||
| if (!device_ptr) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| kernel::AddressPtr workspace = std::make_shared<kernel::Address>(); | kernel::AddressPtr workspace = std::make_shared<kernel::Address>(); | ||||
| MS_EXCEPTION_IF_NULL(workspace); | MS_EXCEPTION_IF_NULL(workspace); | ||||
| workspace->addr = device_ptr; | workspace->addr = device_ptr; | ||||
| workspace->size = workspace_sizes[i]; | workspace->size = workspace_sizes[i]; | ||||
| kernel_workspaces->push_back(workspace); | |||||
| kernel_workspaces->emplace_back(workspace); | |||||
| } | } | ||||
| } | } | ||||
| @@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); | |||||
| auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); | |||||
| if (!ret) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| } | } | ||||
| void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | ||||
| @@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() { | |||||
| if (context_ptr->enable_dynamic_mem_pool()) { | if (context_ptr->enable_dynamic_mem_pool()) { | ||||
| auto device_addr = MallocMemFromMemPool(1); | auto device_addr = MallocMemFromMemPool(1); | ||||
| if (!device_addr) { | if (!device_addr) { | ||||
| MS_LOG(ERROR) << "Dynamic memory pool init error."; | |||||
| MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; | |||||
| } | } | ||||
| } else { | } else { | ||||
| // Need to reserve 20% space for dynamic memory | // Need to reserve 20% space for dynamic memory | ||||
| @@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> | |||||
| auto device_address = | auto device_address = | ||||
| CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | ||||
| MS_EXCEPTION_IF_NULL(device_address); | MS_EXCEPTION_IF_NULL(device_address); | ||||
| mem_manager_->MallocMemFromMemPool(device_address, tensor_size); | |||||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); | |||||
| if (!ret) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| AnfAlgo::SetOutputAddr(device_address, index, item.get()); | AnfAlgo::SetOutputAddr(device_address, index, item.get()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { | |||||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | ||||
| auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | ||||
| MS_EXCEPTION_IF_NULL(device_address); | MS_EXCEPTION_IF_NULL(device_address); | ||||
| mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||||
| if (!ret) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); | AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { | |||||
| for (size_t i = 0; i < workspace_lists.size(); ++i) { | for (size_t i = 0; i < workspace_lists.size(); ++i) { | ||||
| auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); | auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); | ||||
| MS_EXCEPTION_IF_NULL(device_address); | MS_EXCEPTION_IF_NULL(device_address); | ||||
| mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); | |||||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); | |||||
| if (!ret) { | |||||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||||
| } | |||||
| AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); | AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { | |||||
| } | } | ||||
| } | } | ||||
| void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { | |||||
| bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { | |||||
| auto device_ptr = MallocMemFromMemPool(size); | auto device_ptr = MallocMemFromMemPool(size); | ||||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||||
| if (!device_ptr) { | |||||
| return false; | |||||
| } | |||||
| address->ptr_ = device_ptr; | address->ptr_ = device_ptr; | ||||
| address->from_mem_pool_ = true; | address->from_mem_pool_ = true; | ||||
| return true; | |||||
| } | } | ||||
| void *MemoryManager::MallocMemFromMemPool(size_t size) { | void *MemoryManager::MallocMemFromMemPool(size_t size) { | ||||
| @@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) { | |||||
| } | } | ||||
| } | } | ||||
| void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||||
| bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||||
| std::vector<size_t> size_list) { | std::vector<size_t> size_list) { | ||||
| auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list); | auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list); | ||||
| if (device_ptr_list.size() == 0) { | |||||
| return false; | |||||
| } | |||||
| if (addr_list.size() != device_ptr_list.size()) { | if (addr_list.size() != device_ptr_list.size()) { | ||||
| MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; | MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; | ||||
| } | } | ||||
| @@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad | |||||
| addr_list[i]->ptr_ = device_ptr_list[i]; | addr_list[i]->ptr_ = device_ptr_list[i]; | ||||
| addr_list[i]->from_mem_pool_ = true; | addr_list[i]->from_mem_pool_ = true; | ||||
| } | } | ||||
| return true; | |||||
| } | } | ||||
| std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) { | std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) { | ||||
| @@ -46,11 +46,11 @@ class MemoryManager { | |||||
| uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); | uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); | ||||
| virtual uint8_t *MallocMem(int flag, size_t size); | virtual uint8_t *MallocMem(int flag, size_t size); | ||||
| virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | |||||
| virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | |||||
| virtual void *MallocMemFromMemPool(size_t size); | virtual void *MallocMemFromMemPool(size_t size); | ||||
| virtual void FreeMemFromMemPool(const DeviceAddressPtr address); | virtual void FreeMemFromMemPool(const DeviceAddressPtr address); | ||||
| virtual void FreeMemFromMemPool(void *device_ptr); | virtual void FreeMemFromMemPool(void *device_ptr); | ||||
| virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||||
| virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||||
| std::vector<size_t> size_list); | std::vector<size_t> size_list); | ||||
| virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list); | virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list); | ||||
| @@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) { | |||||
| std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size, | std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size, | ||||
| std::vector<size_t> size_list) { | std::vector<size_t> size_list) { | ||||
| std::vector<DeviceMemPtr> device_addr_list; | |||||
| // Pre-alloc the one whole piece memory. | // Pre-alloc the one whole piece memory. | ||||
| auto device_addr = AllocTensorMem(total_size); | auto device_addr = AllocTensorMem(total_size); | ||||
| MS_EXCEPTION_IF_NULL(device_addr); | |||||
| if (!device_addr) { | |||||
| return device_addr_list; | |||||
| } | |||||
| // Remove the pre-alloc memory. | // Remove the pre-alloc memory. | ||||
| auto mem_block = FindMemBlock(device_addr); | auto mem_block = FindMemBlock(device_addr); | ||||
| MS_EXCEPTION_IF_NULL(mem_block); | MS_EXCEPTION_IF_NULL(mem_block); | ||||
| @@ -54,7 +57,6 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t | |||||
| (void)mem_block->block_all_mem_buf_map_.erase(iter); | (void)mem_block->block_all_mem_buf_map_.erase(iter); | ||||
| // Split the pre-alloc memory into continuous memory by the size list. | // Split the pre-alloc memory into continuous memory by the size list. | ||||
| DynamicMemBufPtr continuous_mem_buf; | DynamicMemBufPtr continuous_mem_buf; | ||||
| std::vector<DeviceMemPtr> device_addr_list; | |||||
| auto buf_addr = device_addr; | auto buf_addr = device_addr; | ||||
| for (size_t i = 0; i < size_list.size(); i++) { | for (size_t i = 0; i < size_list.size(); i++) { | ||||
| continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]); | continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]); | ||||
| @@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) { | |||||
| DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { | DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { | ||||
| size_t alloc_mem_size = CalMemBlockAllocSize(size); | size_t alloc_mem_size = CalMemBlockAllocSize(size); | ||||
| if (alloc_mem_size == 0) { | |||||
| return nullptr; | |||||
| } | |||||
| // Add new memory block | // Add new memory block | ||||
| DeviceMemPtr device_addr = nullptr; | DeviceMemPtr device_addr = nullptr; | ||||
| auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr); | auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr); | ||||
| if (real_alloc_size < size) { | if (real_alloc_size < size) { | ||||
| MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" | |||||
| << size << "]."; | |||||
| MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size | |||||
| << "]."; | |||||
| return nullptr; | |||||
| } | } | ||||
| auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size); | auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size); | ||||
| MS_EXCEPTION_IF_NULL(mem_block); | MS_EXCEPTION_IF_NULL(mem_block); | ||||
| @@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { | |||||
| size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { | size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { | ||||
| auto device_free_mem_size = free_mem_size(); | auto device_free_mem_size = free_mem_size(); | ||||
| if (device_free_mem_size < size) { | if (device_free_mem_size < size) { | ||||
| MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size | |||||
| << "] is smaller than required size[" << size << "]."; | |||||
| MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size | |||||
| << "] is smaller than required size[" << size << "]."; | |||||
| return 0; | |||||
| } | } | ||||
| auto alloc_mem_size = mem_alloc_unit_size(); | auto alloc_mem_size = mem_alloc_unit_size(); | ||||
| // Growing at twice of alloc size | // Growing at twice of alloc size | ||||
| while (alloc_mem_size < size) { | while (alloc_mem_size < size) { | ||||
| @@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me | |||||
| MS_EXCEPTION_IF_NULL(mem_buf); | MS_EXCEPTION_IF_NULL(mem_buf); | ||||
| auto mem_block = FindMemBlock(mem_buf->device_addr_); | auto mem_block = FindMemBlock(mem_buf->device_addr_); | ||||
| MS_EXCEPTION_IF_NULL(mem_block); | MS_EXCEPTION_IF_NULL(mem_block); | ||||
| // Divide new memory buf | // Divide new memory buf | ||||
| size_t newbuf_size = mem_buf->size_ - size; | size_t newbuf_size = mem_buf->size_ - size; | ||||
| mem_buf->size_ = size; | mem_buf->size_ = size; | ||||