diff --git a/apis/python/node/dora/cuda.py b/apis/python/node/dora/cuda.py index 84a99c13..77779eaa 100644 --- a/apis/python/node/dora/cuda.py +++ b/apis/python/node/dora/cuda.py @@ -11,7 +11,7 @@ from numba.cuda.cudadrv.devicearray import DeviceNDArray from numba.cuda import to_device -def torch_to_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]: +def torch_to_ipc_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]: """Converts a Pytorch tensor into a pyarrow buffer containing the IPC handle and its metadata.""" device_arr = to_device(tensor) cuda_buf = pa.cuda.CudaBuffer.from_numba(device_arr.gpu_data) @@ -24,7 +24,7 @@ def torch_to_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]: return pa.array(handle_buffer, type=pa.uint8()), metadata -def buffer_to_ipc_handle(handle_buffer: pa.array) -> cuda.IpcMemHandle: +def ipc_buffer_to_ipc_handle(handle_buffer: pa.array) -> cuda.IpcMemHandle: """Converts a buffer containing a serialized handler into cuda IPC MemHandle.""" handle_buffer = handle_buffer.buffers()[1] ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer) diff --git a/examples/cuda-latency/README.md b/examples/cuda-benchmark/README.md similarity index 100% rename from examples/cuda-latency/README.md rename to examples/cuda-benchmark/README.md diff --git a/examples/cuda-latency/cpu_bench.yml b/examples/cuda-benchmark/cpu_bench.yml similarity index 100% rename from examples/cuda-latency/cpu_bench.yml rename to examples/cuda-benchmark/cpu_bench.yml diff --git a/examples/cuda-latency/cuda_bench.yml b/examples/cuda-benchmark/cuda_bench.yml similarity index 100% rename from examples/cuda-latency/cuda_bench.yml rename to examples/cuda-benchmark/cuda_bench.yml diff --git a/examples/cuda-latency/cuda_receiver.py b/examples/cuda-benchmark/cuda_receiver.py similarity index 88% rename from examples/cuda-latency/cuda_receiver.py rename to examples/cuda-benchmark/cuda_receiver.py index 1b6cf76a..c9211843 100644 --- a/examples/cuda-latency/cuda_receiver.py +++ b/examples/cuda-benchmark/cuda_receiver.py @@ -8,7 +8,7 @@ import time import pyarrow as pa from tqdm import tqdm from dora import Node -from dora.cuda import buffer_to_ipc_handle, cudabuffer_to_torch +from dora.cuda import ipc_buffer_to_ipc_handle, cudabuffer_to_torch from helper import record_results import torch @@ -43,9 +43,10 @@ while True: else: # AFTER # storage needs to be spawned in the same file as where it's used. Don't ask me why. - ipc_handle = buffer_to_ipc_handle(event["value"]) + ipc_handle = ipc_buffer_to_ipc_handle(event["value"]) cudabuffer = ctx.open_ipc_buffer(ipc_handle) - torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"]) + torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"]) # on cuda + print(torch_tensor[0]) else: break t_received = time.perf_counter_ns() diff --git a/examples/cuda-latency/cuda_sender.py b/examples/cuda-benchmark/cuda_sender.py similarity index 86% rename from examples/cuda-latency/cuda_sender.py rename to examples/cuda-benchmark/cuda_sender.py index 78e0c7f5..453886cc 100644 --- a/examples/cuda-latency/cuda_sender.py +++ b/examples/cuda-benchmark/cuda_sender.py @@ -6,7 +6,7 @@ import os import numpy as np import pyarrow as pa from dora import Node -from dora.cuda import torch_to_buffer +from dora.cuda import torch_to_ipc_buffer import torch torch.tensor([], device="cuda") @@ -36,10 +36,10 @@ for size in SIZES: node.send_output("latency", pa.array(torch_tensor.numpy()), metadata) else: # AFTER - buffer, metadata = torch_to_buffer(torch_tensor) + ipc_buffer, metadata = torch_to_ipc_buffer(torch_tensor) metadata["time"] = t_send metadata["device"] = "cuda" - node.send_output("latency", buffer, metadata) + node.send_output("latency", ipc_buffer, metadata) # Wait before sending next output node.next() diff --git a/examples/cuda-latency/helper.py b/examples/cuda-benchmark/helper.py similarity index 100% rename from examples/cuda-latency/helper.py rename to examples/cuda-benchmark/helper.py