Merge branch 'main' into add-vggt-urdf-simulator

8 months ago · f7259c5e72
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1165,9 +1165,9 @@ dependencies = [

 [[package]]
 name = "avif-serialize"
 version = "0.8.3"
 version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e"
 checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd"
 dependencies = [
 "arrayvec",
 ]
--- a/examples/so101/Readme.md
+++ b/examples/so101/Readme.md
@@ -0,0 +1,60 @@
 ## SO101 Arm Control

 This example provides gamepad control and leader-follower functionality for the SO-101 robotic arm.

 ### Install Dependencies

 install the required Python packages for rerun visualization (optional):

 ```bash
 # Install the URDF loader for Rerun visualization
 pip install git+https://github.com/dora-rs/rerun-loader-python-urdf
 ```

 ### Hardware Setup

 1. Connect your SO-101 arm(s) to your computer via USB/serial
 2. Note the serial port names (e.g.,for linux `/dev/ttyACM0`, `/dev/ttyACM1`)
 3. Connect your gamepad controller
 4. Update the `PORT` environment variable in the YAML files

 #### Single Arm Control (arm_gamepad_control.yml)

 Control a single SO-101 arm with gamepad input and visualization:

 ```bash
 dora build arm.yml
 dora run arm.yml
 ```

 #### Leader-Follower Mode (leader_follower.yml)

 Use one arm as a leader to control another follower arm:

 ```bash
 dora build leader.yml
 dora run leader.yml
 ```

 #### Serial Port Configuration

 Update the `PORT` environment variable in the YAML files:

 ```yaml
 env:
  PORT: /dev/ttyACM0  # Change to your actual port
 ```

 ## Troubleshooting

 ### Serial Connection Issues
 - Check that the arm is powered on and connected
 - Verify the correct serial port in the YAML configuration
 - Ensure proper permissions: `sudo chmod +x PORT`

 ### Gamepad Not Detected
 - Verify gamepad is connected and recognized by the system
 - Test with `jstest /dev/input/js0` (Linux)

 ## Safety Notes
 - Always ensure the arm has sufficient clearance before operation
--- a/examples/so101/arm_gamepad_control.yml
+++ b/examples/so101/arm_gamepad_control.yml
@@ -0,0 +1,48 @@
 nodes:
  - id: so101
    build: pip install -e ../../node-hub/dora-rustypot
    path: dora-rustypot
    inputs:
      tick: dora/timer/millis/10
      pose: pytorch_kinematics/cmd_vel
    outputs:
      - pose
    env:
      PORT: /dev/ttyACM0
      IDS: 1 2 3 4 5

  - id: pytorch_kinematics
    build: pip install -e ../../node-hub/dora-pytorch-kinematics
    path: dora-pytorch-kinematics
    inputs:
      cmd_vel: gamepad/cmd_vel
    outputs:
      - cmd_vel
    env:
      MODEL_NAME: "so_arm101_description"
      END_EFFECTOR_LINK: "gripper"
      TRANSFORM: "0. 0. 0. 1. 0. 0. 0."
      POSITION_TOLERANCE: 0.01
      ROTATION_TOLERANCE: 0.03

  - id: gamepad
    build: pip install -e ../../node-hub/gamepad
    path: gamepad
    outputs:
      - cmd_vel
      - raw_control
    inputs:
      tick: dora/timer/millis/10
    env:
      MAX_LINEAR_SPEED: 0.01
      MAX_ANGULAR_SPEED: 0.05

  # comment below path if you don't want to visualize the arm in rerun
  - id: plot
    build: pip install -e ../../node-hub/dora-rerun
    path: dora-rerun
    inputs:
      jointstate_so101_new_calib: so101/pose
    env:
      so101_new_calib_urdf: "so_arm101_description"
      so101_new_calib_transform: "0. 0. 0. 1. 0. 0. 0."
--- a/examples/so101/leader_follower.yml
+++ b/examples/so101/leader_follower.yml
@@ -0,0 +1,33 @@
 nodes:
  - id: so101
    build: pip install -e ../../node-hub/dora-rustypot
    path: dora-rustypot
    inputs:
      tick: dora/timer/millis/10
      pose: leader_interface/pose
    outputs:
      - pose
    env:
      PORT: /dev/ttyACM0
      IDS: 1 2 3 4 5 6

  - id: leader_interface
    build: pip install -e ../../node-hub/dora-rustypot
    path: dora-rustypot
    inputs:
      tick: dora/timer/millis/10
    outputs:
      - pose  
    env:
      PORT: /dev/ttyACM1
      IDS: 1 2 3 4 5 6

  # comment below path if you don't want to visualize the arms in rerun 
  - id: plot
    build: pip install -e ../../node-hub/dora-rerun
    path: dora-rerun
    inputs:
      jointstate_so101_new_calib: so101/pose
    env:
      so101_new_calib_urdf: "so_arm101_description"
      so101_new_calib_transform: "0. 0. 0. 1. 0. 0. 0."
--- a/examples/vggt/depth-to-avif.yaml
+++ b/examples/vggt/depth-to-avif.yaml
@@ -0,0 +1,54 @@
 nodes:
  - id: camera
    build: pip install opencv-video-capture
    path: opencv-video-capture
    inputs:
      tick: dora/timer/millis/100
    outputs:
      - image
    env:
      CAPTURE_PATH: 1

  - id: dora-vggt
    build: pip install -e ../../node-hub/dora-vggt
    path: dora-vggt
    inputs:
      image: camera/image
    outputs:
      - depth
      - image
    env:
      DEPTH_ENCODING: mono16

  - id: rav1e-depth
    path: dora-rav1e
    build: cargo build -p dora-rav1e --release
    inputs:
      depth: dora-vggt/depth
    outputs:
      - depth
    env:
      ENCODING: avif

  - id: rav1e-image
    path: dora-rav1e
    build: cargo build -p dora-rav1e --release
    inputs:
      image: dora-vggt/image
    outputs:
      - image
    env:
      ENCODING: avif

  - id: bench
    path: image_saver.py
    inputs:
      camera_depth: rav1e-image/image
      vggt_depth: rav1e-depth/depth

  - id: plot
    build: pip install dora-rerun
    path: dora-rerun
    inputs:
      camera/image: dora-vggt/image
      camera/depth: dora-vggt/depth
--- a/examples/vggt/depth.dora-session.yaml
+++ b/examples/vggt/depth.dora-session.yaml
@@ -1,8 +0,0 @@
 build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369
 session_id: 275de19c-e605-4865-bc5f-2f15916bade9
 git_sources: {}
 local_build:
  node_working_dirs:
    camera: /Users/xaviertao/Documents/work/dora/examples/vggt
    dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt
    plot: /Users/xaviertao/Documents/work/dora/examples/vggt
--- a/examples/vggt/image_saver.py
+++ b/examples/vggt/image_saver.py
@@ -0,0 +1,34 @@
 from dora import Node

 node = Node()

 index_dict = {}
 i = 0

 LEAD_TOPIC = "vggt_depth"

 for event in node:
    if event["type"] == "INPUT":
        if LEAD_TOPIC in event["id"]:
            storage = event["value"]
            metadata = event["metadata"]
            encoding = metadata["encoding"]
            width = metadata["width"]
            height = metadata["height"]

            # Save to file
            filename = f"out/{event['id']}_{i}.{encoding}"
            with open(filename, "wb") as f:
                f.write(storage.to_numpy())
            for key, value in index_dict.items():
                filename = f"out/{key}_{i}.{value['metadata']['encoding']}"
                with open(filename, "wb") as f:
                    f.write(value["value"])
            i += 1
        else:
            # Store the event in the index dictionary
            index_dict[event["id"]] = {
                "type": event["type"],
                "value": event["value"].to_numpy(),
                "metadata": event["metadata"],
            }
--- a/node-hub/dora-rav1e/Cargo.toml
+++ b/node-hub/dora-rav1e/Cargo.toml
@@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [
    "eyre",
    "generate-import-lib",
 ], optional = true }
 avif-serialize = "0.8.3"
 avif-serialize = "0.8.4"


 [lib]
--- a/node-hub/dora-rav1e/src/lib.rs
+++ b/node-hub/dora-rav1e/src/lib.rs
@@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> {
                    if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() {
                        let mut buffer = buffer.values().to_vec();
                        if std::env::var("FILL_ZEROS")
                            .map(|s| s != "false")
                            .map(|s| s.to_lowercase() != "false")
                            .unwrap_or(true)
                        {
                            fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height);
@@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> {
                                let data = pkt.data;
                                match output_encoding.as_str() {
                                    "avif" => {
                                        warn!("avif encoding not supported for mono16");
                                        metadata.parameters.insert(
                                            "encoding".to_string(),
                                            Parameter::String("avif".to_string()),
                                        );

                                        let data = avif_serialize::Aviffy::new()
                                            .full_color_range(false)
                                            .set_seq_profile(0)
                                            .set_monochrome(true)
                                            .to_vec(
                                                &data,
                                                None,
                                                enc.width as u32,
                                                enc.height as u32,
                                                enc.bit_depth as u8,
                                            );

                                        let arrow = data.into_arrow();

                                        node.send_output(id, metadata.parameters.clone(), arrow)
                                            .context("could not send output")
                                            .unwrap();
                                    }
                                    _ => {
                                        metadata.parameters.insert(
--- a/node-hub/dora-vggt/dora_vggt/main.py
+++ b/node-hub/dora-vggt/dora_vggt/main.py
@@ -3,6 +3,7 @@ import io
 import os
 from collections import deque


 import cv2
 import numpy as np
 import pyarrow as pa
@@ -19,11 +20,15 @@ VGGT_NUM_IMAGES = int(os.getenv("VGGT_NUM_IMAGES", "2"))

 dtype = torch.bfloat16

 # Check if cuda is available and set the device accordingly
 device = "cuda" if torch.cuda.is_available() else "cpu"

 # Initialize the model and load the pretrained weights.
 # This will automatically download the model weights the first time it's run, which may take a while.
 model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda")
 model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)
 model.eval()

 DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64")
 # Import vecdeque


@@ -34,7 +39,6 @@ def main():

    for event in node:
        if event["type"] == "INPUT":

            if "image" in event["id"]:
                storage = event["value"]
                metadata = event["metadata"]
@@ -82,7 +86,7 @@ def main():
                raw_images.append(buffer)

                with torch.no_grad():
                    images = load_and_preprocess_images(raw_images).to("cuda")
                    images = load_and_preprocess_images(raw_images).to(device)

                    images = images[None]  # add batch dimension
                    aggregated_tokens_list, ps_idx = model.aggregator(images)
@@ -108,20 +112,24 @@ def main():
                    depth_map = depth_map[-1][-1].cpu().numpy()
                    depth_map = SCALE_FACTOR * depth_map
                    # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
                    if DEPTH_ENCODING == "mono16":
                        depth_map = (depth_map * 1000).astype(np.uint16)

                    node.send_output(
                        output_id=event["id"].replace("image", "depth"),
                        data=pa.array(depth_map.ravel()),
                        metadata={
                            "width": depth_map.shape[1],
                            "height": depth_map.shape[0],
                        "focal": [
                            int(f_0),
                            int(f_1),
                        ],
                        "resolution": [
                            int(r_0),
                            int(r_1),
                        ],
                            "encoding": DEPTH_ENCODING,
                            "focal": [
                                int(f_0),
                                int(f_1),
                            ],
                            "resolution": [
                                int(r_0),
                                int(r_1),
                            ],
                        },
                    )