#!/usr/bin/env python # -*- coding: utf-8 -*- import os import time import pyarrow as pa from tqdm import tqdm from dora import Node from dora.cuda import ipc_buffer_to_ipc_handle, cudabuffer_to_torch from helper import record_results import torch torch.tensor([], device="cuda") pa.array([]) pbar = tqdm(total=100) context = pa.cuda.Context() node = Node("node_2") current_size = 8 n = 0 i = 0 latencies = [] DEVICE = os.getenv("DEVICE", "cuda") NAME = f"dora torch {DEVICE}" ctx = pa.cuda.Context() while True: event = node.next() if event["type"] == "INPUT": t_send = event["metadata"]["time"] if event["metadata"]["device"] != "cuda": # BEFORE handle = event["value"].to_numpy() torch_tensor = torch.tensor(handle, device="cuda") else: # AFTER # storage needs to be spawned in the same file as where it's used. Don't ask me why. ipc_handle = ipc_buffer_to_ipc_handle(event["value"]) cudabuffer = ctx.open_ipc_buffer(ipc_handle) torch_tensor = cudabuffer_to_torch(cudabuffer, event["metadata"]) # on cuda else: break t_received = time.perf_counter_ns() length = len(torch_tensor) * 8 if length != current_size: if n > 0: pbar.close() pbar = tqdm(total=100) record_results(NAME, current_size, latencies) current_size = length n = 0 start = time.perf_counter_ns() latencies = [] pbar.update(1) latencies.append((t_received - t_send) / 1000) node.send_output("next", pa.array([])) n += 1 i += 1 record_results(NAME, current_size, latencies)