From d6e55e1eae1ad577ea434963728914bf3570eaed Mon Sep 17 00:00:00 2001 From: haixuantao Date: Tue, 1 Jul 2025 13:51:27 +0200 Subject: [PATCH] Add vggt based environment simulation --- examples/urdf/vggt/franka.yml | 70 ++++++++++++++++++++++++++++ examples/urdf/vggt/kuka.yml | 68 +++++++++++++++++++++++++++ examples/urdf/vggt/so_arm101.yml | 69 +++++++++++++++++++++++++++ examples/urdf/vggt/z1.yml | 59 +++++++++++++++++++++++ node-hub/dora-vggt/dora_vggt/main.py | 17 +++---- 5 files changed, 273 insertions(+), 10 deletions(-) create mode 100644 examples/urdf/vggt/franka.yml create mode 100644 examples/urdf/vggt/kuka.yml create mode 100644 examples/urdf/vggt/so_arm101.yml create mode 100644 examples/urdf/vggt/z1.yml diff --git a/examples/urdf/vggt/franka.yml b/examples/urdf/vggt/franka.yml new file mode 100644 index 00000000..40a715ed --- /dev/null +++ b/examples/urdf/vggt/franka.yml @@ -0,0 +1,70 @@ +nodes: + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_panda: pytorch_kinematics/cmd_vel + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth + env: + panda_urdf: "panda_description" + panda_transform: .5 -0. -0.1 1. 0. 0. 0. + CAMERA_PITCH: 1.5708 + + - id: gamepad + build: pip install -e ../../node-hub/gamepad + path: gamepad + outputs: + - cmd_vel + - raw_control + inputs: + tick: dora/timer/millis/10 + env: + MAX_LINEAR_SPEED: 0.01 + MAX_ANGULAR_SPEED: 0.05 + + - id: pytorch_kinematics + build: pip install -e ../../node-hub/dora-pytorch-kinematics + path: dora-pytorch-kinematics + inputs: + cmd_vel: gamepad/cmd_vel + outputs: + - cmd_vel + env: + MODEL_NAME: "panda_description" + END_EFFECTOR_LINK: "panda_link8" + TRANSFORM: .5 -0. -0.1 1. 0. 0. 0. + POSITION_TOLERANCE: 0.001 + ROTATION_TOLERANCE: 0.001 + + - id: camera + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 4 + + - id: camera2 + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 6 + + - id: dora-vggt + build: pip install -e ../../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + image2: camera2/image + outputs: + - depth + - image + env: + SCALE_FACTOR: 0.9 diff --git a/examples/urdf/vggt/kuka.yml b/examples/urdf/vggt/kuka.yml new file mode 100644 index 00000000..ad4fd383 --- /dev/null +++ b/examples/urdf/vggt/kuka.yml @@ -0,0 +1,68 @@ +nodes: + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_iiwa14_primitive_collision: pytorch_kinematics/cmd_vel + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth + env: + iiwa14_primitive_collision_urdf: "iiwa14_description" + iiwa14_primitive_collision_transform: .5 -0. -0.1 1. 0. 0. 0. + CAMERA_PITCH: 1.5708 + + - id: gamepad + build: pip install -e ../../node-hub/gamepad + path: gamepad + outputs: + - cmd_vel + - raw_control + inputs: + tick: dora/timer/millis/10 + env: + MAX_LINEAR_SPEED: 0.02 + MAX_ANGULAR_SPEED: 0.10 + + - id: pytorch_kinematics + build: pip install -e ../../node-hub/dora-pytorch-kinematics + path: dora-pytorch-kinematics + inputs: + cmd_vel: gamepad/cmd_vel + outputs: + - cmd_vel + env: + MODEL_NAME: "iiwa14_description" + END_EFFECTOR_LINK: "iiwa_link_7" + TRANSFORM: .5 -0. -0.1 1. 0. 0. 0. + + - id: camera + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 4 + + - id: camera2 + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 6 + + - id: dora-vggt + build: pip install -e ../../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + image2: camera2/image + outputs: + - depth + - image + env: + SCALE_FACTOR: 0.9 diff --git a/examples/urdf/vggt/so_arm101.yml b/examples/urdf/vggt/so_arm101.yml new file mode 100644 index 00000000..ea9e878a --- /dev/null +++ b/examples/urdf/vggt/so_arm101.yml @@ -0,0 +1,69 @@ +nodes: + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_so101_new_calib: pytorch_kinematics/cmd_vel + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth + env: + so101_new_calib_urdf: "so_arm101_description" + so101_new_calib_transform: .14 -0. 0.4 -.5 .5 .5 -.5 + + - id: gamepad + build: pip install -e ../../node-hub/gamepad + path: gamepad + outputs: + - cmd_vel + - raw_control + inputs: + tick: dora/timer/millis/10 + env: + MAX_LINEAR_SPEED: 0.01 + MAX_ANGULAR_SPEED: 0.05 + + - id: pytorch_kinematics + build: pip install -e ../../node-hub/dora-pytorch-kinematics + path: dora-pytorch-kinematics + inputs: + cmd_vel: gamepad/cmd_vel + outputs: + - cmd_vel + env: + MODEL_NAME: "so_arm101_description" + END_EFFECTOR_LINK: "gripper" + TRANSFORM: .14 -0. 0.4 -.5 .5 .5 -.5 + POSITION_TOLERANCE: 0.01 + ROTATION_TOLERANCE: 0.03 + + - id: camera + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 4 + + - id: camera2 + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 6 + + - id: dora-vggt + build: pip install -e ../../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + image2: camera2/image + outputs: + - depth + - image + env: + SCALE_FACTOR: 0.9 diff --git a/examples/urdf/vggt/z1.yml b/examples/urdf/vggt/z1.yml new file mode 100644 index 00000000..801e1de2 --- /dev/null +++ b/examples/urdf/vggt/z1.yml @@ -0,0 +1,59 @@ +nodes: + - id: plot + build: pip install -e ../../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_z1: pytorch_kinematics/cmd_vel + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth + env: + z1_urdf: z1_description + z1_transform: .5 -0.2 -0.11 1. 0. 0. 0. + CAMERA_PITCH: 1.5708 + + - id: gamepad + build: pip install -e ../../../node-hub/gamepad + path: gamepad + outputs: + - cmd_vel + - raw_control + inputs: + tick: dora/timer/millis/10 + env: + MAX_LINEAR_SPEED: 0.01 + MAX_ANGULAR_SPEED: 0.05 + + - id: pytorch_kinematics + build: pip install -e ../../../node-hub/dora-pytorch-kinematics + path: dora-pytorch-kinematics + inputs: + cmd_vel: gamepad/cmd_vel + outputs: + - cmd_vel + env: + MODEL_NAME: "z1_description" + END_EFFECTOR_LINK: "link06" + TRANSFORM: .5 -0.2 -0.11 1. 0. 0. 0. + POSITION_TOLERANCE: 0.001 + ROTATION_TOLERANCE: 0.001 + + - id: camera + build: pip install -e ../../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 4 + + - id: dora-vggt + build: pip install -e ../../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + outputs: + - depth + - image + env: + SCALE_FACTOR: 0.88 diff --git a/node-hub/dora-vggt/dora_vggt/main.py b/node-hub/dora-vggt/dora_vggt/main.py index 7c0e24c7..9cab97b8 100644 --- a/node-hub/dora-vggt/dora_vggt/main.py +++ b/node-hub/dora-vggt/dora_vggt/main.py @@ -1,5 +1,5 @@ """TODO: Add docstring.""" - +import os import io from collections import deque as Deque @@ -13,6 +13,8 @@ from vggt.models.vggt import VGGT from vggt.utils.load_fn import load_and_preprocess_images from vggt.utils.pose_enc import pose_encoding_to_extri_intri +SCALE_FACTOR = float(os.getenv("SCALE_FACTOR", "1")) +VGGT_NUM_IMAGES = int(os.getenv("VGGT_NUM_IMAGES", "2")) # bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+) dtype = torch.bfloat16 @@ -28,7 +30,7 @@ model.eval() def main(): """TODO: Add docstring.""" node = Node() - raw_images = Deque(maxlen=2) + raw_images = Deque(maxlen=VGGT_NUM_IMAGES) for event in node: if event["type"] == "INPUT": @@ -100,15 +102,14 @@ def main(): depth_map, depth_conf = model.depth_head( aggregated_tokens_list, images, ps_idx ) - print(depth_conf.max()) depth_map[depth_conf < 1.0] = 0.0 # Set low confidence pixels to 0 depth_map = depth_map.to(torch.float64) depth_map = depth_map[-1][-1].cpu().numpy() - + depth_map = SCALE_FACTOR * depth_map # Warning: Make sure to add my_output_id and my_input_id within the dataflow. node.send_output( - output_id="depth", + output_id=event["id"].replace("image", "depth"), data=pa.array(depth_map.ravel()), metadata={ "width": depth_map.shape[1], @@ -129,13 +130,9 @@ def main(): # reorder pixels to be in last dimension image = image.transpose(1, 2, 0) - print( - f"Image shape: {image.shape}, dtype: {image.dtype} and depth map shape: {depth_map.shape}, dtype: {depth_map.dtype}" - ) - # Warning: Make sure to add my_output_id and my_input_id within the dataflow. node.send_output( - output_id="image", + output_id=event["id"], data=pa.array(image.ravel()), metadata={ "encoding": "rgb8",