You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

demo_receiver.py 2.1 kB

10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
10 months ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/usr/bin/env python
  2. """TODO: Add docstring."""
  3. import os
  4. import time
  5. import numpy as np
  6. import pyarrow as pa
  7. import torch
  8. from dora import Node
  9. from dora.cuda import ipc_buffer_to_ipc_handle, open_ipc_handle
  10. from helper import record_results
  11. from tqdm import tqdm
  12. torch.tensor([], device="cuda")
  13. pa.array([])
  14. node = Node("node_2")
  15. current_size = 8
  16. n = 0
  17. i = 0
  18. latencies = []
  19. mean_cpu = mean_cuda = 0
  20. DEVICE = os.getenv("DEVICE", "cuda")
  21. NAME = f"dora torch {DEVICE}"
  22. print()
  23. print("Receiving 40MB packets using default dora-rs")
  24. while True:
  25. event = node.next()
  26. if event["type"] == "INPUT":
  27. if i == 0:
  28. pbar = tqdm(total=100)
  29. elif i == 100:
  30. print("vs")
  31. print("Receiving 40MB packets using dora-rs CUDA->CUDA")
  32. pbar = tqdm(total=100)
  33. t_send = event["metadata"]["time"]
  34. if event["metadata"]["device"] != "cuda":
  35. # BEFORE
  36. handle = event["value"].to_numpy()
  37. scope = None
  38. torch_tensor = torch.tensor(handle, device="cuda")
  39. else:
  40. # AFTER
  41. ipc_handle = ipc_buffer_to_ipc_handle(event["value"], event["metadata"])
  42. scope = open_ipc_handle(ipc_handle, event["metadata"])
  43. torch_tensor = scope.__enter__()
  44. else:
  45. break
  46. t_received = time.perf_counter_ns()
  47. length = len(torch_tensor) * 8
  48. pbar.update(1)
  49. latencies.append((t_received - t_send) / 1000)
  50. node.send_output("next", pa.array([]))
  51. i += 1
  52. if i == 100:
  53. pbar.close()
  54. t_end_cpu = time.time()
  55. mean_cpu = np.array(latencies).mean()
  56. latencies = []
  57. n += 1
  58. if scope:
  59. scope.__exit__(None, None, None)
  60. mean_cuda = np.array(latencies).mean()
  61. pbar.close()
  62. time.sleep(2)
  63. print()
  64. print("----")
  65. print(f"Node communication duration with default dora-rs: {mean_cpu / 1000:.1f}ms")
  66. print(f"Node communication duration with dora CUDA->CUDA: {mean_cuda / 1000:.1f}ms")
  67. print("----")
  68. print(f"Speed Up: {(mean_cpu) / (mean_cuda):.0f}")
  69. record_results(NAME, current_size, latencies)