diff --git a/apis/python/node/dora/cuda.py b/apis/python/node/dora/cuda.py
index 84a99c13..77779eaa 100644
--- a/apis/python/node/dora/cuda.py
+++ b/apis/python/node/dora/cuda.py
@@ -11,7 +11,7 @@ from numba.cuda.cudadrv.devicearray import DeviceNDArray
 from numba.cuda import to_device
 
 
-def torch_to_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]:
+def torch_to_ipc_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]:
     """Converts a Pytorch tensor into a pyarrow buffer containing the IPC handle and its metadata."""
     device_arr = to_device(tensor)
     cuda_buf = pa.cuda.CudaBuffer.from_numba(device_arr.gpu_data)
@@ -24,7 +24,7 @@ def torch_to_buffer(tensor: torch.TensorType) -> tuple[pa.array, dict]:
     return pa.array(handle_buffer, type=pa.uint8()), metadata
 
 
-def buffer_to_ipc_handle(handle_buffer: pa.array) -> cuda.IpcMemHandle:
+def ipc_buffer_to_ipc_handle(handle_buffer: pa.array) -> cuda.IpcMemHandle:
     """Converts a buffer containing a serialized handler into cuda IPC MemHandle."""
     handle_buffer = handle_buffer.buffers()[1]
     ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer)
diff --git a/examples/cuda-latency/README.md b/examples/cuda-benchmark/README.md
similarity index 100%
rename from examples/cuda-latency/README.md
rename to examples/cuda-benchmark/README.md
diff --git a/examples/cuda-latency/cpu_bench.yml b/examples/cuda-benchmark/cpu_bench.yml
similarity index 100%
rename from examples/cuda-latency/cpu_bench.yml
rename to examples/cuda-benchmark/cpu_bench.yml
diff --git a/examples/cuda-latency/cuda_bench.yml b/examples/cuda-benchmark/cuda_bench.yml
similarity index 100%
rename from examples/cuda-latency/cuda_bench.yml
rename to examples/cuda-benchmark/cuda_bench.yml
diff --git a/examples/cuda-latency/cuda_receiver.py b/examples/cuda-benchmark/cuda_receiver.py
similarity index 88%
rename from examples/cuda-latency/cuda_receiver.py
rename to examples/cuda-benchmark/cuda_receiver.py
index 1b6cf76a..c9211843 100644
--- a/examples/cuda-latency/cuda_receiver.py
+++ b/examples/cuda-benchmark/cuda_receiver.py
@@ -8,7 +8,7 @@ import time
 import pyarrow as pa
 from tqdm import tqdm
 from dora import Node
-from dora.cuda import buffer_to_ipc_handle, cudabuffer_to_torch
+from dora.cuda import ipc_buffer_to_ipc_handle, cudabuffer_to_torch
 from helper import record_results
 import torch
 
@@ -43,9 +43,10 @@ while True:
         else:
             # AFTER
             # storage needs to be spawned in the same file as where it's used. Don't ask me why.
-            ipc_handle = buffer_to_ipc_handle(event["value"])
+            ipc_handle = ipc_buffer_to_ipc_handle(event["value"])
             cudabuffer = ctx.open_ipc_buffer(ipc_handle)
-            torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"])
+            torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"])  # on cuda
+            print(torch_tensor[0])
     else:
         break
     t_received = time.perf_counter_ns()
diff --git a/examples/cuda-latency/cuda_sender.py b/examples/cuda-benchmark/cuda_sender.py
similarity index 86%
rename from examples/cuda-latency/cuda_sender.py
rename to examples/cuda-benchmark/cuda_sender.py
index 78e0c7f5..453886cc 100644
--- a/examples/cuda-latency/cuda_sender.py
+++ b/examples/cuda-benchmark/cuda_sender.py
@@ -6,7 +6,7 @@ import os
 import numpy as np
 import pyarrow as pa
 from dora import Node
-from dora.cuda import torch_to_buffer
+from dora.cuda import torch_to_ipc_buffer
 import torch
 
 torch.tensor([], device="cuda")
@@ -36,10 +36,10 @@ for size in SIZES:
             node.send_output("latency", pa.array(torch_tensor.numpy()), metadata)
         else:
             # AFTER
-            buffer, metadata = torch_to_buffer(torch_tensor)
+            ipc_buffer, metadata = torch_to_ipc_buffer(torch_tensor)
             metadata["time"] = t_send
             metadata["device"] = "cuda"
-            node.send_output("latency", buffer, metadata)
+            node.send_output("latency", ipc_buffer, metadata)
 
         # Wait before sending next output
         node.next()
diff --git a/examples/cuda-latency/helper.py b/examples/cuda-benchmark/helper.py
similarity index 100%
rename from examples/cuda-latency/helper.py
rename to examples/cuda-benchmark/helper.py