dora-rs
/
dora

#!/usr/bin/env python
"""TODO: Add docstring."""

import os
import time

import numpy as np
import pyarrow as pa
import torch
from dora import Node
from dora.cuda import ipc_buffer_to_ipc_handle, open_ipc_handle
from helper import record_results
from tqdm import tqdm

torch.tensor([], device="cuda")


pa.array([])
node = Node("node_2")

current_size = 8
n = 0
i = 0
latencies = []
mean_cpu = mean_cuda = 0
DEVICE = os.getenv("DEVICE", "cuda")

NAME = f"dora torch {DEVICE}"

print()
print("Receiving 40MB packets using default dora-rs")

while True:
    event = node.next()

    if event["type"] == "INPUT":
        if i == 0:
            pbar = tqdm(total=100)
        elif i == 100:
            print("vs")
            print("Receiving 40MB packets using dora-rs CUDA->CUDA")
            pbar = tqdm(total=100)
        t_send = event["metadata"]["time"]

        if event["metadata"]["device"] != "cuda":
            # BEFORE
            handle = event["value"].to_numpy()
            scope = None
            torch_tensor = torch.tensor(handle, device="cuda")
        else:
            # AFTER
            ipc_handle = ipc_buffer_to_ipc_handle(event["value"], event["metadata"])
            scope = open_ipc_handle(ipc_handle, event["metadata"])
            torch_tensor = scope.__enter__()
    else:
        break
    t_received = time.perf_counter_ns()
    length = len(torch_tensor) * 8

    pbar.update(1)
    latencies.append((t_received - t_send) / 1000)
    node.send_output("next", pa.array([]))

    i += 1
    if i == 100:
        pbar.close()
        t_end_cpu = time.time()
        mean_cpu = np.array(latencies).mean()
        latencies = []
    n += 1

    if scope:
        scope.__exit__(None, None, None)


mean_cuda = np.array(latencies).mean()
pbar.close()

time.sleep(2)

print()
print("----")
print(f"Node communication duration with default dora-rs: {mean_cpu / 1000:.1f}ms")
print(f"Node communication duration with dora CUDA->CUDA: {mean_cuda / 1000:.1f}ms")

print("----")
print(f"Speed Up: {(mean_cpu) / (mean_cuda):.0f}")
record_results(NAME, current_size, latencies)