| @@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| input->addr = device_address->ptr_; | |||
| input->size = device_address->size_; | |||
| kernel_inputs->push_back(input); | |||
| kernel_inputs->emplace_back(input); | |||
| } | |||
| auto output_sizes = kernel_mod.GetOutputSizeList(); | |||
| for (size_t i = 0; i < output_sizes.size(); ++i) { | |||
| auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| if (device_address->ptr_ == nullptr) { | |||
| mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| } | |||
| kernel::AddressPtr output = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| output->addr = device_address->ptr_; | |||
| output->size = output_sizes[i]; | |||
| kernel_outputs->push_back(output); | |||
| kernel_outputs->emplace_back(output); | |||
| } | |||
| auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); | |||
| for (size_t i = 0; i < workspace_sizes.size(); ++i) { | |||
| if (workspace_sizes[i] == 0) { | |||
| @@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||
| continue; | |||
| } | |||
| auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| if (!device_ptr) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| kernel::AddressPtr workspace = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(workspace); | |||
| workspace->addr = device_ptr; | |||
| workspace->size = workspace_sizes[i]; | |||
| kernel_workspaces->push_back(workspace); | |||
| kernel_workspaces->emplace_back(workspace); | |||
| } | |||
| } | |||
| @@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo | |||
| } | |||
| } | |||
| } | |||
| mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); | |||
| auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| } | |||
| void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | |||
| @@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() { | |||
| if (context_ptr->enable_dynamic_mem_pool()) { | |||
| auto device_addr = MallocMemFromMemPool(1); | |||
| if (!device_addr) { | |||
| MS_LOG(ERROR) << "Dynamic memory pool init error."; | |||
| MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; | |||
| } | |||
| } else { | |||
| // Need to reserve 20% space for dynamic memory | |||
| @@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> | |||
| auto device_address = | |||
| CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| mem_manager_->MallocMemFromMemPool(device_address, tensor_size); | |||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| AnfAlgo::SetOutputAddr(device_address, index, item.get()); | |||
| } | |||
| } | |||
| @@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { | |||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | |||
| auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); | |||
| } | |||
| } | |||
| @@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { | |||
| for (size_t i = 0; i < workspace_lists.size(); ++i) { | |||
| auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); | |||
| auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); | |||
| if (!ret) { | |||
| MS_LOG(EXCEPTION) << "Malloc device memory failed."; | |||
| } | |||
| AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); | |||
| } | |||
| } | |||
| @@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { | |||
| } | |||
| } | |||
| void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { | |||
| bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { | |||
| auto device_ptr = MallocMemFromMemPool(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| if (!device_ptr) { | |||
| return false; | |||
| } | |||
| address->ptr_ = device_ptr; | |||
| address->from_mem_pool_ = true; | |||
| return true; | |||
| } | |||
| void *MemoryManager::MallocMemFromMemPool(size_t size) { | |||
| @@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) { | |||
| } | |||
| } | |||
| void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||
| bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||
| std::vector<size_t> size_list) { | |||
| auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list); | |||
| if (device_ptr_list.size() == 0) { | |||
| return false; | |||
| } | |||
| if (addr_list.size() != device_ptr_list.size()) { | |||
| MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; | |||
| } | |||
| @@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad | |||
| addr_list[i]->ptr_ = device_ptr_list[i]; | |||
| addr_list[i]->from_mem_pool_ = true; | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) { | |||
| @@ -46,11 +46,11 @@ class MemoryManager { | |||
| uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); | |||
| virtual uint8_t *MallocMem(int flag, size_t size); | |||
| virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | |||
| virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | |||
| virtual void *MallocMemFromMemPool(size_t size); | |||
| virtual void FreeMemFromMemPool(const DeviceAddressPtr address); | |||
| virtual void FreeMemFromMemPool(void *device_ptr); | |||
| virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||
| virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, | |||
| std::vector<size_t> size_list); | |||
| virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list); | |||
| @@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) { | |||
| std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size, | |||
| std::vector<size_t> size_list) { | |||
| std::vector<DeviceMemPtr> device_addr_list; | |||
| // Pre-alloc the one whole piece memory. | |||
| auto device_addr = AllocTensorMem(total_size); | |||
| MS_EXCEPTION_IF_NULL(device_addr); | |||
| if (!device_addr) { | |||
| return device_addr_list; | |||
| } | |||
| // Remove the pre-alloc memory. | |||
| auto mem_block = FindMemBlock(device_addr); | |||
| MS_EXCEPTION_IF_NULL(mem_block); | |||
| @@ -54,7 +57,6 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t | |||
| (void)mem_block->block_all_mem_buf_map_.erase(iter); | |||
| // Split the pre-alloc memory into continuous memory by the size list. | |||
| DynamicMemBufPtr continuous_mem_buf; | |||
| std::vector<DeviceMemPtr> device_addr_list; | |||
| auto buf_addr = device_addr; | |||
| for (size_t i = 0; i < size_list.size(); i++) { | |||
| continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]); | |||
| @@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) { | |||
| DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { | |||
| size_t alloc_mem_size = CalMemBlockAllocSize(size); | |||
| if (alloc_mem_size == 0) { | |||
| return nullptr; | |||
| } | |||
| // Add new memory block | |||
| DeviceMemPtr device_addr = nullptr; | |||
| auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr); | |||
| if (real_alloc_size < size) { | |||
| MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" | |||
| << size << "]."; | |||
| MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size | |||
| << "]."; | |||
| return nullptr; | |||
| } | |||
| auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size); | |||
| MS_EXCEPTION_IF_NULL(mem_block); | |||
| @@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { | |||
| size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { | |||
| auto device_free_mem_size = free_mem_size(); | |||
| if (device_free_mem_size < size) { | |||
| MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size | |||
| << "] is smaller than required size[" << size << "]."; | |||
| MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size | |||
| << "] is smaller than required size[" << size << "]."; | |||
| return 0; | |||
| } | |||
| auto alloc_mem_size = mem_alloc_unit_size(); | |||
| // Growing at twice of alloc size | |||
| while (alloc_mem_size < size) { | |||
| @@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me | |||
| MS_EXCEPTION_IF_NULL(mem_buf); | |||
| auto mem_block = FindMemBlock(mem_buf->device_addr_); | |||
| MS_EXCEPTION_IF_NULL(mem_block); | |||
| // Divide new memory buf | |||
| size_t newbuf_size = mem_buf->size_ - size; | |||
| mem_buf->size_ = size; | |||