|
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
-
- import os
- import time
-
-
- import pyarrow as pa
- from tqdm import tqdm
- from dora import Node
- from dora.cuda import ipc_buffer_to_ipc_handle, cudabuffer_to_torch
- from helper import record_results
- import torch
- import numpy as np
-
- torch.tensor([], device="cuda")
-
-
- pa.array([])
- context = pa.cuda.Context()
- node = Node("node_2")
-
- current_size = 8
- n = 0
- i = 0
- latencies = []
- mean_cpu = mean_cuda = 0
- DEVICE = os.getenv("DEVICE", "cuda")
-
- NAME = f"dora torch {DEVICE}"
-
- ctx = pa.cuda.Context()
-
- print("")
- print("Receiving 40MB packets using default dora-rs")
-
- while True:
- event = node.next()
-
- if event["type"] == "INPUT":
- if i == 0:
- pbar = tqdm(total=100)
- elif i == 100:
- print("vs")
- print("Receiving 40MB packets using dora-rs CUDA->CUDA")
- pbar = tqdm(total=100)
- t_send = event["metadata"]["time"]
-
- if event["metadata"]["device"] != "cuda":
- # BEFORE
- handle = event["value"].to_numpy()
- torch_tensor = torch.tensor(handle, device="cuda")
- else:
- # AFTER
- # storage needs to be spawned in the same file as where it's used. Don't ask me why.
- ipc_handle = ipc_buffer_to_ipc_handle(event["value"])
- cudabuffer = ctx.open_ipc_buffer(ipc_handle)
- torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"]) # on cuda
- else:
- break
- t_received = time.perf_counter_ns()
- length = len(torch_tensor) * 8
-
- pbar.update(1)
- latencies.append((t_received - t_send) / 1000)
- node.send_output("next", pa.array([]))
-
- i += 1
- if i == 100:
- pbar.close()
- t_end_cpu = time.time()
- mean_cpu = np.array(latencies).mean()
- latencies = []
- n += 1
-
-
- mean_cuda = np.array(latencies).mean()
- pbar.close()
-
- time.sleep(2)
-
- print("")
- print("----")
- print(f"Node communication duration with default dora-rs: {mean_cpu/1000:.1f}ms")
- print(f"Node communication duration with dora CUDA->CUDA: {mean_cuda/1000:.1f}ms")
-
- print("----")
- print(f"Speed Up: {(mean_cpu)/(mean_cuda):.0f}")
- record_results(NAME, current_size, latencies)
|