diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 207893834f..dbdd17c915 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -302,7 +302,7 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) auto output_size = kernel_mod->GetOutputSizeList(); auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_size[output_index]); if (!ret) { - MS_LOG(EXCEPTION) << "Cannot alloc address, tensor size is: " << output_size[output_index]; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << output_size[output_index]; } for (auto &node : item) { @@ -662,8 +662,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo if (profiler_inst->GetEnableFlag()) { profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_); } - CHECK_OP_RET_WITH_EXCEPT(kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_), - "Launch kernel failed."); + if (!kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_)) { + MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope(); + } if (profiler_inst->GetEnableFlag()) { profiler_inst->OpDataProducerEnd(); if (profiler_inst->GetSyncEnableFlag()) { diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc index 1a8da802d5..20e3323544 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc @@ -106,7 +106,9 @@ uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) { MS_EXCEPTION_IF_NULL(context_ptr); if (context_ptr->get_param(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) { auto device_ptr = MallocMemFromMemPool(size); - MS_EXCEPTION_IF_NULL(device_ptr); + if (device_ptr == nullptr) { + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size; + } return AddressOffset(device_ptr, 0); } diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 238cdf5d81..26f3dfe26b 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -187,7 +187,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector MS_EXCEPTION_IF_NULL(mem_manager_); auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); if (!ret) { - MS_LOG(EXCEPTION) << "Malloc device memory failed."; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << tensor_size; } AnfAlgo::SetOutputAddr(device_address, index, item.get()); } @@ -220,7 +220,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(device_address); auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); if (!ret) { - MS_LOG(EXCEPTION) << "Malloc device memory failed."; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << output_sizes[i]; } AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } @@ -238,7 +238,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { MS_EXCEPTION_IF_NULL(device_address); auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); if (!ret) { - MS_LOG(EXCEPTION) << "Malloc device memory failed."; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << workspace_lists[i]; } AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } @@ -651,7 +651,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const MS_EXCEPTION_IF_NULL(address); if (ms_context->get_param(MS_CTX_ENABLE_PYNATIVE_INFER) && !mem_manager_->MallocMemFromMemPool(address, node_size)) { - MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << node_size; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << node_size; } else if (mem_manager_->MallocMem(kStaticMem, node_size, address, graph_id) == nullptr) { MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << node_size; } @@ -692,7 +692,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(address); if (ms_context->get_param(MS_CTX_ENABLE_PYNATIVE_INFER) && !mem_manager_->MallocMemFromMemPool(address, tensor_size)) { - MS_LOG(EXCEPTION) << "Cannot alloc address from memory pool when tensor size is: " << tensor_size; + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << tensor_size; } else if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) { MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; }