From d6e55e1eae1ad577ea434963728914bf3570eaed Mon Sep 17 00:00:00 2001
From: haixuantao <tao.xavier@1ms.ai>
Date: Tue, 1 Jul 2025 13:51:27 +0200
Subject: [PATCH] Add vggt based environment simulation

---
 examples/urdf/vggt/franka.yml        | 70 ++++++++++++++++++++++++++++
 examples/urdf/vggt/kuka.yml          | 68 +++++++++++++++++++++++++++
 examples/urdf/vggt/so_arm101.yml     | 69 +++++++++++++++++++++++++++
 examples/urdf/vggt/z1.yml            | 59 +++++++++++++++++++++++
 node-hub/dora-vggt/dora_vggt/main.py | 17 +++----
 5 files changed, 273 insertions(+), 10 deletions(-)
 create mode 100644 examples/urdf/vggt/franka.yml
 create mode 100644 examples/urdf/vggt/kuka.yml
 create mode 100644 examples/urdf/vggt/so_arm101.yml
 create mode 100644 examples/urdf/vggt/z1.yml

diff --git a/examples/urdf/vggt/franka.yml b/examples/urdf/vggt/franka.yml
new file mode 100644
index 00000000..40a715ed
--- /dev/null
+++ b/examples/urdf/vggt/franka.yml
@@ -0,0 +1,70 @@
+nodes:
+  - id: plot
+    build: pip install -e ../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      jointstate_panda: pytorch_kinematics/cmd_vel
+      camera/image: dora-vggt/image
+      camera/depth: dora-vggt/depth
+    env:
+      panda_urdf: "panda_description"
+      panda_transform: .5 -0. -0.1 1. 0. 0. 0.
+      CAMERA_PITCH: 1.5708
+
+  - id: gamepad
+    build: pip install -e ../../node-hub/gamepad
+    path: gamepad
+    outputs:
+      - cmd_vel
+      - raw_control
+    inputs:
+      tick: dora/timer/millis/10
+    env:
+      MAX_LINEAR_SPEED: 0.01
+      MAX_ANGULAR_SPEED: 0.05
+
+  - id: pytorch_kinematics
+    build: pip install -e ../../node-hub/dora-pytorch-kinematics
+    path: dora-pytorch-kinematics
+    inputs:
+      cmd_vel: gamepad/cmd_vel
+    outputs:
+      - cmd_vel
+    env:
+      MODEL_NAME: "panda_description"
+      END_EFFECTOR_LINK: "panda_link8"
+      TRANSFORM: .5 -0. -0.1 1. 0. 0. 0.
+      POSITION_TOLERANCE: 0.001
+      ROTATION_TOLERANCE: 0.001
+
+  - id: camera
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 4
+
+  - id: camera2
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 6
+
+  - id: dora-vggt
+    build: pip install -e ../../../node-hub/dora-vggt
+    path: dora-vggt
+    inputs:
+      image: camera/image
+      image2: camera2/image
+    outputs:
+      - depth
+      - image
+    env:
+      SCALE_FACTOR: 0.9
diff --git a/examples/urdf/vggt/kuka.yml b/examples/urdf/vggt/kuka.yml
new file mode 100644
index 00000000..ad4fd383
--- /dev/null
+++ b/examples/urdf/vggt/kuka.yml
@@ -0,0 +1,68 @@
+nodes:
+  - id: plot
+    build: pip install -e ../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      jointstate_iiwa14_primitive_collision: pytorch_kinematics/cmd_vel
+      camera/image: dora-vggt/image
+      camera/depth: dora-vggt/depth
+    env:
+      iiwa14_primitive_collision_urdf: "iiwa14_description"
+      iiwa14_primitive_collision_transform: .5 -0. -0.1 1. 0. 0. 0.
+      CAMERA_PITCH: 1.5708
+
+  - id: gamepad
+    build: pip install -e ../../node-hub/gamepad
+    path: gamepad
+    outputs:
+      - cmd_vel
+      - raw_control
+    inputs:
+      tick: dora/timer/millis/10
+    env:
+      MAX_LINEAR_SPEED: 0.02
+      MAX_ANGULAR_SPEED: 0.10
+
+  - id: pytorch_kinematics
+    build: pip install -e ../../node-hub/dora-pytorch-kinematics
+    path: dora-pytorch-kinematics
+    inputs:
+      cmd_vel: gamepad/cmd_vel
+    outputs:
+      - cmd_vel
+    env:
+      MODEL_NAME: "iiwa14_description"
+      END_EFFECTOR_LINK: "iiwa_link_7"
+      TRANSFORM: .5 -0. -0.1 1. 0. 0. 0.
+
+  - id: camera
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 4
+
+  - id: camera2
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 6
+
+  - id: dora-vggt
+    build: pip install -e ../../../node-hub/dora-vggt
+    path: dora-vggt
+    inputs:
+      image: camera/image
+      image2: camera2/image
+    outputs:
+      - depth
+      - image
+    env:
+      SCALE_FACTOR: 0.9
diff --git a/examples/urdf/vggt/so_arm101.yml b/examples/urdf/vggt/so_arm101.yml
new file mode 100644
index 00000000..ea9e878a
--- /dev/null
+++ b/examples/urdf/vggt/so_arm101.yml
@@ -0,0 +1,69 @@
+nodes:
+  - id: plot
+    build: pip install -e ../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      jointstate_so101_new_calib: pytorch_kinematics/cmd_vel
+      camera/image: dora-vggt/image
+      camera/depth: dora-vggt/depth
+    env:
+      so101_new_calib_urdf: "so_arm101_description"
+      so101_new_calib_transform: .14 -0. 0.4 -.5 .5 .5 -.5
+
+  - id: gamepad
+    build: pip install -e ../../node-hub/gamepad
+    path: gamepad
+    outputs:
+      - cmd_vel
+      - raw_control
+    inputs:
+      tick: dora/timer/millis/10
+    env:
+      MAX_LINEAR_SPEED: 0.01
+      MAX_ANGULAR_SPEED: 0.05
+
+  - id: pytorch_kinematics
+    build: pip install -e ../../node-hub/dora-pytorch-kinematics
+    path: dora-pytorch-kinematics
+    inputs:
+      cmd_vel: gamepad/cmd_vel
+    outputs:
+      - cmd_vel
+    env:
+      MODEL_NAME: "so_arm101_description"
+      END_EFFECTOR_LINK: "gripper"
+      TRANSFORM: .14 -0. 0.4 -.5 .5 .5 -.5
+      POSITION_TOLERANCE: 0.01
+      ROTATION_TOLERANCE: 0.03
+
+  - id: camera
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 4
+
+  - id: camera2
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 6
+
+  - id: dora-vggt
+    build: pip install -e ../../../node-hub/dora-vggt
+    path: dora-vggt
+    inputs:
+      image: camera/image
+      image2: camera2/image
+    outputs:
+      - depth
+      - image
+    env:
+      SCALE_FACTOR: 0.9
diff --git a/examples/urdf/vggt/z1.yml b/examples/urdf/vggt/z1.yml
new file mode 100644
index 00000000..801e1de2
--- /dev/null
+++ b/examples/urdf/vggt/z1.yml
@@ -0,0 +1,59 @@
+nodes:
+  - id: plot
+    build: pip install -e ../../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      jointstate_z1: pytorch_kinematics/cmd_vel
+      camera/image: dora-vggt/image
+      camera/depth: dora-vggt/depth
+    env:
+      z1_urdf: z1_description
+      z1_transform: .5 -0.2 -0.11 1. 0. 0. 0.
+      CAMERA_PITCH: 1.5708
+
+  - id: gamepad
+    build: pip install -e ../../../node-hub/gamepad
+    path: gamepad
+    outputs:
+      - cmd_vel
+      - raw_control
+    inputs:
+      tick: dora/timer/millis/10
+    env:
+      MAX_LINEAR_SPEED: 0.01
+      MAX_ANGULAR_SPEED: 0.05
+
+  - id: pytorch_kinematics
+    build: pip install -e ../../../node-hub/dora-pytorch-kinematics
+    path: dora-pytorch-kinematics
+    inputs:
+      cmd_vel: gamepad/cmd_vel
+    outputs:
+      - cmd_vel
+    env:
+      MODEL_NAME: "z1_description"
+      END_EFFECTOR_LINK: "link06"
+      TRANSFORM: .5 -0.2 -0.11 1. 0. 0. 0.
+      POSITION_TOLERANCE: 0.001
+      ROTATION_TOLERANCE: 0.001
+
+  - id: camera
+    build: pip install -e ../../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/100
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 4
+
+  - id: dora-vggt
+    build: pip install -e ../../../node-hub/dora-vggt
+    path: dora-vggt
+    inputs:
+      image: camera/image
+    outputs:
+      - depth
+      - image
+    env:
+      SCALE_FACTOR: 0.88
diff --git a/node-hub/dora-vggt/dora_vggt/main.py b/node-hub/dora-vggt/dora_vggt/main.py
index 7c0e24c7..9cab97b8 100644
--- a/node-hub/dora-vggt/dora_vggt/main.py
+++ b/node-hub/dora-vggt/dora_vggt/main.py
@@ -1,5 +1,5 @@
 """TODO: Add docstring."""
-
+import os
 import io
 from collections import deque as Deque
 
@@ -13,6 +13,8 @@ from vggt.models.vggt import VGGT
 from vggt.utils.load_fn import load_and_preprocess_images
 from vggt.utils.pose_enc import pose_encoding_to_extri_intri
 
+SCALE_FACTOR = float(os.getenv("SCALE_FACTOR", "1"))
+VGGT_NUM_IMAGES = int(os.getenv("VGGT_NUM_IMAGES", "2"))
 # bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+)
 
 dtype = torch.bfloat16
@@ -28,7 +30,7 @@ model.eval()
 def main():
     """TODO: Add docstring."""
     node = Node()
-    raw_images = Deque(maxlen=2)
+    raw_images = Deque(maxlen=VGGT_NUM_IMAGES)
 
     for event in node:
         if event["type"] == "INPUT":
@@ -100,15 +102,14 @@ def main():
                     depth_map, depth_conf = model.depth_head(
                         aggregated_tokens_list, images, ps_idx
                     )
-                    print(depth_conf.max())
                     depth_map[depth_conf < 1.0] = 0.0  # Set low confidence pixels to 0
                     depth_map = depth_map.to(torch.float64)
 
                     depth_map = depth_map[-1][-1].cpu().numpy()
-
+                    depth_map = SCALE_FACTOR * depth_map
                     # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
                     node.send_output(
-                        output_id="depth",
+                        output_id=event["id"].replace("image", "depth"),
                         data=pa.array(depth_map.ravel()),
                         metadata={
                             "width": depth_map.shape[1],
@@ -129,13 +130,9 @@ def main():
                     # reorder pixels to be in last dimension
                     image = image.transpose(1, 2, 0)
 
-                    print(
-                        f"Image shape: {image.shape}, dtype: {image.dtype} and depth map shape: {depth_map.shape}, dtype: {depth_map.dtype}"
-                    )
-
                     # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
                     node.send_output(
-                        output_id="image",
+                        output_id=event["id"],
                         data=pa.array(image.ravel()),
                         metadata={
                             "encoding": "rgb8",