Browse Source

Add vggt based URDF visualisation (#1044)

tags/v0.3.12-fix
Haixuan Xavier Tao GitHub 6 months ago
parent
commit
dfc24300f8
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
5 changed files with 276 additions and 13 deletions
  1. +70
    -0
      examples/urdf/vggt/franka.yml
  2. +68
    -0
      examples/urdf/vggt/kuka.yml
  3. +69
    -0
      examples/urdf/vggt/so_arm101.yml
  4. +59
    -0
      examples/urdf/vggt/z1.yml
  5. +10
    -13
      node-hub/dora-vggt/dora_vggt/main.py

+ 70
- 0
examples/urdf/vggt/franka.yml View File

@@ -0,0 +1,70 @@
nodes:
- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
jointstate_panda: pytorch_kinematics/cmd_vel
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth
env:
panda_urdf: "panda_description"
panda_transform: .5 -0. -0.1 1. 0. 0. 0.
CAMERA_PITCH: 1.5708

- id: gamepad
build: pip install -e ../../node-hub/gamepad
path: gamepad
outputs:
- cmd_vel
- raw_control
inputs:
tick: dora/timer/millis/10
env:
MAX_LINEAR_SPEED: 0.01
MAX_ANGULAR_SPEED: 0.05

- id: pytorch_kinematics
build: pip install -e ../../node-hub/dora-pytorch-kinematics
path: dora-pytorch-kinematics
inputs:
cmd_vel: gamepad/cmd_vel
outputs:
- cmd_vel
env:
MODEL_NAME: "panda_description"
END_EFFECTOR_LINK: "panda_link8"
TRANSFORM: .5 -0. -0.1 1. 0. 0. 0.
POSITION_TOLERANCE: 0.001
ROTATION_TOLERANCE: 0.001

- id: camera
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 4

- id: camera2
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 6

- id: dora-vggt
build: pip install -e ../../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
image2: camera2/image
outputs:
- depth
- image
env:
SCALE_FACTOR: 0.9

+ 68
- 0
examples/urdf/vggt/kuka.yml View File

@@ -0,0 +1,68 @@
nodes:
- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
jointstate_iiwa14_primitive_collision: pytorch_kinematics/cmd_vel
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth
env:
iiwa14_primitive_collision_urdf: "iiwa14_description"
iiwa14_primitive_collision_transform: .5 -0. -0.1 1. 0. 0. 0.
CAMERA_PITCH: 1.5708

- id: gamepad
build: pip install -e ../../node-hub/gamepad
path: gamepad
outputs:
- cmd_vel
- raw_control
inputs:
tick: dora/timer/millis/10
env:
MAX_LINEAR_SPEED: 0.02
MAX_ANGULAR_SPEED: 0.10

- id: pytorch_kinematics
build: pip install -e ../../node-hub/dora-pytorch-kinematics
path: dora-pytorch-kinematics
inputs:
cmd_vel: gamepad/cmd_vel
outputs:
- cmd_vel
env:
MODEL_NAME: "iiwa14_description"
END_EFFECTOR_LINK: "iiwa_link_7"
TRANSFORM: .5 -0. -0.1 1. 0. 0. 0.

- id: camera
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 4

- id: camera2
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 6

- id: dora-vggt
build: pip install -e ../../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
image2: camera2/image
outputs:
- depth
- image
env:
SCALE_FACTOR: 0.9

+ 69
- 0
examples/urdf/vggt/so_arm101.yml View File

@@ -0,0 +1,69 @@
nodes:
- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
jointstate_so101_new_calib: pytorch_kinematics/cmd_vel
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth
env:
so101_new_calib_urdf: "so_arm101_description"
so101_new_calib_transform: .14 -0. 0.4 -.5 .5 .5 -.5

- id: gamepad
build: pip install -e ../../node-hub/gamepad
path: gamepad
outputs:
- cmd_vel
- raw_control
inputs:
tick: dora/timer/millis/10
env:
MAX_LINEAR_SPEED: 0.01
MAX_ANGULAR_SPEED: 0.05

- id: pytorch_kinematics
build: pip install -e ../../node-hub/dora-pytorch-kinematics
path: dora-pytorch-kinematics
inputs:
cmd_vel: gamepad/cmd_vel
outputs:
- cmd_vel
env:
MODEL_NAME: "so_arm101_description"
END_EFFECTOR_LINK: "gripper"
TRANSFORM: .14 -0. 0.4 -.5 .5 .5 -.5
POSITION_TOLERANCE: 0.01
ROTATION_TOLERANCE: 0.03

- id: camera
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 4

- id: camera2
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 6

- id: dora-vggt
build: pip install -e ../../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
image2: camera2/image
outputs:
- depth
- image
env:
SCALE_FACTOR: 0.9

+ 59
- 0
examples/urdf/vggt/z1.yml View File

@@ -0,0 +1,59 @@
nodes:
- id: plot
build: pip install -e ../../../node-hub/dora-rerun
path: dora-rerun
inputs:
jointstate_z1: pytorch_kinematics/cmd_vel
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth
env:
z1_urdf: z1_description
z1_transform: .5 -0.2 -0.11 1. 0. 0. 0.
CAMERA_PITCH: 1.5708

- id: gamepad
build: pip install -e ../../../node-hub/gamepad
path: gamepad
outputs:
- cmd_vel
- raw_control
inputs:
tick: dora/timer/millis/10
env:
MAX_LINEAR_SPEED: 0.01
MAX_ANGULAR_SPEED: 0.05

- id: pytorch_kinematics
build: pip install -e ../../../node-hub/dora-pytorch-kinematics
path: dora-pytorch-kinematics
inputs:
cmd_vel: gamepad/cmd_vel
outputs:
- cmd_vel
env:
MODEL_NAME: "z1_description"
END_EFFECTOR_LINK: "link06"
TRANSFORM: .5 -0.2 -0.11 1. 0. 0. 0.
POSITION_TOLERANCE: 0.001
ROTATION_TOLERANCE: 0.001

- id: camera
build: pip install -e ../../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 4

- id: dora-vggt
build: pip install -e ../../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
outputs:
- depth
- image
env:
SCALE_FACTOR: 0.88

+ 10
- 13
node-hub/dora-vggt/dora_vggt/main.py View File

@@ -1,8 +1,8 @@
"""TODO: Add docstring.""" """TODO: Add docstring."""

import io import io
import os import os
from collections import deque as Deque
from collections import deque



import cv2 import cv2
import numpy as np import numpy as np
@@ -14,6 +14,8 @@ from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images from vggt.utils.load_fn import load_and_preprocess_images
from vggt.utils.pose_enc import pose_encoding_to_extri_intri from vggt.utils.pose_enc import pose_encoding_to_extri_intri


SCALE_FACTOR = float(os.getenv("SCALE_FACTOR", "1"))
VGGT_NUM_IMAGES = int(os.getenv("VGGT_NUM_IMAGES", "2"))
# bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+) # bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+)


dtype = torch.bfloat16 dtype = torch.bfloat16
@@ -33,7 +35,7 @@ DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64")
def main(): def main():
"""TODO: Add docstring.""" """TODO: Add docstring."""
node = Node() node = Node()
raw_images = Deque(maxlen=2)
raw_images = deque(maxlen=VGGT_NUM_IMAGES)


for event in node: for event in node:
if event["type"] == "INPUT": if event["type"] == "INPUT":
@@ -92,7 +94,7 @@ def main():
pose_enc = model.camera_head(aggregated_tokens_list)[-1] pose_enc = model.camera_head(aggregated_tokens_list)[-1]
# Extrinsic and intrinsic matrices, following OpenCV convention (camera from world) # Extrinsic and intrinsic matrices, following OpenCV convention (camera from world)
extrinsic, intrinsic = pose_encoding_to_extri_intri( extrinsic, intrinsic = pose_encoding_to_extri_intri(
pose_enc, images.shape[-2:]
pose_enc, images.shape[-2:],
) )
intrinsic = intrinsic[-1][-1] intrinsic = intrinsic[-1][-1]
f_0 = intrinsic[0, 0] f_0 = intrinsic[0, 0]
@@ -102,20 +104,19 @@ def main():


# Predict Depth Maps # Predict Depth Maps
depth_map, depth_conf = model.depth_head( depth_map, depth_conf = model.depth_head(
aggregated_tokens_list, images, ps_idx
aggregated_tokens_list, images, ps_idx,
) )
print(depth_conf.max())
depth_map[depth_conf < 1.0] = 0.0 # Set low confidence pixels to 0 depth_map[depth_conf < 1.0] = 0.0 # Set low confidence pixels to 0
depth_map = depth_map.to(torch.float64) depth_map = depth_map.to(torch.float64)


depth_map = depth_map[-1][-1].cpu().numpy() depth_map = depth_map[-1][-1].cpu().numpy()
depth_map = SCALE_FACTOR * depth_map
# Warning: Make sure to add my_output_id and my_input_id within the dataflow. # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
if DEPTH_ENCODING == "mono16": if DEPTH_ENCODING == "mono16":
depth_map = (depth_map * 1000).astype(np.uint16) depth_map = (depth_map * 1000).astype(np.uint16)


node.send_output( node.send_output(
output_id="depth",
output_id=event["id"].replace("image", "depth"),
data=pa.array(depth_map.ravel()), data=pa.array(depth_map.ravel()),
metadata={ metadata={
"width": depth_map.shape[1], "width": depth_map.shape[1],
@@ -137,13 +138,9 @@ def main():
# reorder pixels to be in last dimension # reorder pixels to be in last dimension
image = image.transpose(1, 2, 0) image = image.transpose(1, 2, 0)


print(
f"Image shape: {image.shape}, dtype: {image.dtype} and depth map shape: {depth_map.shape}, dtype: {depth_map.dtype}"
)

# Warning: Make sure to add my_output_id and my_input_id within the dataflow. # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
node.send_output( node.send_output(
output_id="image",
output_id=event["id"],
data=pa.array(image.ravel()), data=pa.array(image.ravel()),
metadata={ metadata={
"encoding": "rgb8", "encoding": "rgb8",


Loading…
Cancel
Save