Compare commits

...

3 Commits

5 changed files with 133 additions and 29 deletions
Split View
  1. +7
    -2
      examples/vggt/image_saver.py
  2. +78
    -9
      examples/vggt/vggt-v-realsense.yaml
  3. +1
    -1
      node-hub/dora-pyrealsense/dora_pyrealsense/main.py
  4. +0
    -1
      node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer
  5. +47
    -16
      node-hub/dora-vggt/dora_vggt/main.py

+ 7
- 2
examples/vggt/image_saver.py View File

@@ -1,3 +1,5 @@
import time

from dora import Node

node = Node()
@@ -7,6 +9,7 @@ i = 0

LEAD_TOPIC = "vggt_depth"

current_time = time.strftime("%H:%M:%S")
for event in node:
if event["type"] == "INPUT":
if LEAD_TOPIC in event["id"]:
@@ -17,11 +20,13 @@ for event in node:
height = metadata["height"]

# Save to file
filename = f"out/{event['id']}_{i}.{encoding}"
filename = f"out/{current_time}_{event['id']}_{i}.{encoding}"
with open(filename, "wb") as f:
f.write(storage.to_numpy())
for key, value in index_dict.items():
filename = f"out/{key}_{i}.{value['metadata']['encoding']}"
filename = (
f"out/{current_time}_{key}_{i}.{value['metadata']['encoding']}"
)
with open(filename, "wb") as f:
f.write(value["value"])
i += 1


+ 78
- 9
examples/vggt/vggt-v-realsense.yaml View File

@@ -1,29 +1,98 @@
nodes:
- id: camera
- id: camera-0
build: pip install -e ../../node-hub/dora-pyrealsense
path: dora-pyrealsense
inputs:
tick: dora/timer/millis/100
tick: dora/timer/millis/300
outputs:
- image
- depth
env:
CAPTURE_PATH: 8
DEVICE_SERIAL: "243222073837"

- id: camera-1
build: pip install -e ../../node-hub/dora-pyrealsense
path: dora-pyrealsense
inputs:
tick: dora/timer/millis/300
outputs:
- image
- depth
env:
DEVICE_SERIAL: "243322073274"

- id: dora-vggt
build: pip install -e ../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
image-0: camera-0/image
image-1: camera-1/image
outputs:
- depth-0
- image-0
- depth-1
- image-1
env:
CAMERA_HEIGHT_Y: 0.505
DEPTH_ENCODING: mono16

- id: rav1e-depth
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
depth-0: dora-vggt/depth-0
outputs:
- depth-0
env:
ENCODING: avif

- id: rav1e-image
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
image-0: dora-vggt/image-0
outputs:
- image-0
env:
ENCODING: avif

- id: camera-0-rav1e-image
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
image: camera-0/image
outputs:
- depth
- image
env:
ENCODING: avif

- id: camera-0-rav1e-depth
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
depth: camera-0/depth
outputs:
- depth
env:
ENCODING: avif

- id: plot
build: pip install dora-rerun
path: dora-rerun
inputs:
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth
realsense/image: camera/image
realsense/depth: camera/depth
vggt-0/image-0: dora-vggt/image-0
vggt-0/depth-0: dora-vggt/depth-0
realsense-0/image: camera-0/image
realsense-0/depth: camera-0/depth
vggt-1/image-1: dora-vggt/image-1
vggt-1/depth-1: dora-vggt/depth-1
realsense-1/image: camera-1/image
realsense-1/depth: camera-1/depth

- id: bench
path: image_saver.py
inputs:
vggt_image: rav1e-image/image-0
vggt_depth: rav1e-depth/depth-0
camera_image: camera-0-rav1e-image/image
camera_depth: camera-0-rav1e-depth/depth

+ 1
- 1
node-hub/dora-pyrealsense/dora_pyrealsense/main.py View File

@@ -26,7 +26,7 @@ def main():

# Serial list
serials = [device.get_info(rs.camera_info.serial_number) for device in devices]
if device_serial and (device_serial in serials):
if device_serial and (device_serial not in serials):
raise ConnectionError(
f"Device with serial {device_serial} not found within: {serials}.",
)


+ 0
- 1
node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer

@@ -1 +0,0 @@
Subproject commit b2889e65cfe62571ced3ce88f00e7d80b41fee69

+ 47
- 16
node-hub/dora-vggt/dora_vggt/main.py View File

@@ -11,11 +11,13 @@ import torch
from dora import Node
from PIL import Image
from vggt.models.vggt import VGGT
from vggt.utils.geometry import unproject_depth_map_to_point_map
from vggt.utils.load_fn import load_and_preprocess_images
from vggt.utils.pose_enc import pose_encoding_to_extri_intri

# bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+)
CAMERA_HEIGHT_Y = os.getenv("CAMERA_HEIGHT_Y", "0.115")

# bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+)
dtype = torch.bfloat16

# Check if cuda is available and set the device accordingly
@@ -27,7 +29,6 @@ model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)
model.eval()

DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64")
# Import vecdeque


def main():
@@ -94,28 +95,62 @@ def main():
extrinsic, intrinsic = pose_encoding_to_extri_intri(
pose_enc, images.shape[-2:]
)
intrinsic = intrinsic[-1][-1]
f_0 = intrinsic[0, 0]
f_1 = intrinsic[1, 1]
r_0 = intrinsic[0, 2]
r_1 = intrinsic[1, 2]
print(f"Extrinsic: {extrinsic}")
print(f"Intrinsic: {intrinsic}")

# Predict Depth Maps
depth_map, depth_conf = model.depth_head(
aggregated_tokens_list, images, ps_idx
)
print(depth_conf.max())
depth_map[depth_conf < 1.0] = 0.0 # Set low confidence pixels to 0
depth_map[depth_conf < 0.6] = 0.0 # Set low confidence pixels to 0

# Construct 3D Points from Depth Maps and Cameras
# which usually leads to more accurate 3D points than point map branch
point_map_by_unprojection = unproject_depth_map_to_point_map(
depth_map.squeeze(0), extrinsic.squeeze(0), intrinsic.squeeze(0)
)

# Get the last quartile of the 2nd axis
z_value = point_map_by_unprojection[0, :, :, 2] # S, H, W, 3
scale_factor = 0.51

print(
f"Event Id: {event['id']} Scale factor: {scale_factor}, with height: {CAMERA_HEIGHT_Y} and max depth: {point_map_by_unprojection[0, :, :, 1].max()}"
)
print(
f" 0. all min and max depth values: {point_map_by_unprojection[0, :, :, 0].min()} / {point_map_by_unprojection[0, :, :, 0].max()}"
)
print(
f" 1. all min and max depth values: {point_map_by_unprojection[0, :, :, 1].min()} / {point_map_by_unprojection[0, :, :, 1].max()}"
)
print(
f" 2. all min and max depth values: {point_map_by_unprojection[0, :, :, 2].min()} / {point_map_by_unprojection[0, :, :, 2].max()}"
)

print(
f"Depth map before scaling: min and max: {depth_map.min()} / {depth_map.max()}"
)

depth_map = (
depth_map * scale_factor
) # Scale depth map to the desired depth
print(
f"Depth map after scaling min and max in meters: {depth_map.min()} / {depth_map.max()}. Depth map shape: {depth_map.shape}"
)
depth_map = depth_map.to(torch.float64)

intrinsic = intrinsic[-1][-1]
f_0 = intrinsic[0, 0]
f_1 = intrinsic[1, 1]
r_0 = intrinsic[0, 2]
r_1 = intrinsic[1, 2]
depth_map = depth_map[-1][-1].cpu().numpy()

# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
if DEPTH_ENCODING == "mono16":
depth_map = (depth_map * 1000).astype(np.uint16)

node.send_output(
output_id="depth",
output_id=event["id"].replace("image", "depth"),
data=pa.array(depth_map.ravel()),
metadata={
"width": depth_map.shape[1],
@@ -137,13 +172,9 @@ def main():
# reorder pixels to be in last dimension
image = image.transpose(1, 2, 0)

print(
f"Image shape: {image.shape}, dtype: {image.dtype} and depth map shape: {depth_map.shape}, dtype: {depth_map.dtype}"
)

# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
node.send_output(
output_id="image",
output_id=event["id"],
data=pa.array(image.ravel()),
metadata={
"encoding": "rgb8",


Loading…
Cancel
Save