diff --git a/Cargo.lock b/Cargo.lock index e03b96b1..b5f3ecd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,9 +1165,9 @@ dependencies = [ [[package]] name = "avif-serialize" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" +checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd" dependencies = [ "arrayvec", ] diff --git a/examples/so101/Readme.md b/examples/so101/Readme.md new file mode 100644 index 00000000..efd18880 --- /dev/null +++ b/examples/so101/Readme.md @@ -0,0 +1,60 @@ +## SO101 Arm Control + +This example provides gamepad control and leader-follower functionality for the SO-101 robotic arm. + +### Install Dependencies + +install the required Python packages for rerun visualization (optional): + +```bash +# Install the URDF loader for Rerun visualization +pip install git+https://github.com/dora-rs/rerun-loader-python-urdf +``` + +### Hardware Setup + +1. Connect your SO-101 arm(s) to your computer via USB/serial +2. Note the serial port names (e.g.,for linux `/dev/ttyACM0`, `/dev/ttyACM1`) +3. Connect your gamepad controller +4. Update the `PORT` environment variable in the YAML files + +#### Single Arm Control (arm_gamepad_control.yml) + +Control a single SO-101 arm with gamepad input and visualization: + +```bash +dora build arm.yml +dora run arm.yml +``` + +#### Leader-Follower Mode (leader_follower.yml) + +Use one arm as a leader to control another follower arm: + +```bash +dora build leader.yml +dora run leader.yml +``` + +#### Serial Port Configuration + +Update the `PORT` environment variable in the YAML files: + +```yaml +env: + PORT: /dev/ttyACM0 # Change to your actual port +``` + +## Troubleshooting + +### Serial Connection Issues +- Check that the arm is powered on and connected +- Verify the correct serial port in the YAML configuration +- Ensure proper permissions: `sudo chmod +x PORT` + +### Gamepad Not Detected +- Verify gamepad is connected and recognized by the system +- Test with `jstest /dev/input/js0` (Linux) + +## Safety Notes +- Always ensure the arm has sufficient clearance before operation \ No newline at end of file diff --git a/examples/so101/arm_gamepad_control.yml b/examples/so101/arm_gamepad_control.yml new file mode 100644 index 00000000..9ddacd55 --- /dev/null +++ b/examples/so101/arm_gamepad_control.yml @@ -0,0 +1,48 @@ +nodes: + - id: so101 + build: pip install -e ../../node-hub/dora-rustypot + path: dora-rustypot + inputs: + tick: dora/timer/millis/10 + pose: pytorch_kinematics/cmd_vel + outputs: + - pose + env: + PORT: /dev/ttyACM0 + IDS: 1 2 3 4 5 + + - id: pytorch_kinematics + build: pip install -e ../../node-hub/dora-pytorch-kinematics + path: dora-pytorch-kinematics + inputs: + cmd_vel: gamepad/cmd_vel + outputs: + - cmd_vel + env: + MODEL_NAME: "so_arm101_description" + END_EFFECTOR_LINK: "gripper" + TRANSFORM: "0. 0. 0. 1. 0. 0. 0." + POSITION_TOLERANCE: 0.01 + ROTATION_TOLERANCE: 0.03 + + - id: gamepad + build: pip install -e ../../node-hub/gamepad + path: gamepad + outputs: + - cmd_vel + - raw_control + inputs: + tick: dora/timer/millis/10 + env: + MAX_LINEAR_SPEED: 0.01 + MAX_ANGULAR_SPEED: 0.05 + + # comment below path if you don't want to visualize the arm in rerun + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_so101_new_calib: so101/pose + env: + so101_new_calib_urdf: "so_arm101_description" + so101_new_calib_transform: "0. 0. 0. 1. 0. 0. 0." diff --git a/examples/so101/leader_follower.yml b/examples/so101/leader_follower.yml new file mode 100644 index 00000000..b73ef4a3 --- /dev/null +++ b/examples/so101/leader_follower.yml @@ -0,0 +1,33 @@ +nodes: + - id: so101 + build: pip install -e ../../node-hub/dora-rustypot + path: dora-rustypot + inputs: + tick: dora/timer/millis/10 + pose: leader_interface/pose + outputs: + - pose + env: + PORT: /dev/ttyACM0 + IDS: 1 2 3 4 5 6 + + - id: leader_interface + build: pip install -e ../../node-hub/dora-rustypot + path: dora-rustypot + inputs: + tick: dora/timer/millis/10 + outputs: + - pose + env: + PORT: /dev/ttyACM1 + IDS: 1 2 3 4 5 6 + + # comment below path if you don't want to visualize the arms in rerun + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + jointstate_so101_new_calib: so101/pose + env: + so101_new_calib_urdf: "so_arm101_description" + so101_new_calib_transform: "0. 0. 0. 1. 0. 0. 0." \ No newline at end of file diff --git a/examples/vggt/depth-to-avif.yaml b/examples/vggt/depth-to-avif.yaml new file mode 100644 index 00000000..6db92ac3 --- /dev/null +++ b/examples/vggt/depth-to-avif.yaml @@ -0,0 +1,54 @@ +nodes: + - id: camera + build: pip install opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 1 + + - id: dora-vggt + build: pip install -e ../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + outputs: + - depth + - image + env: + DEPTH_ENCODING: mono16 + + - id: rav1e-depth + path: dora-rav1e + build: cargo build -p dora-rav1e --release + inputs: + depth: dora-vggt/depth + outputs: + - depth + env: + ENCODING: avif + + - id: rav1e-image + path: dora-rav1e + build: cargo build -p dora-rav1e --release + inputs: + image: dora-vggt/image + outputs: + - image + env: + ENCODING: avif + + - id: bench + path: image_saver.py + inputs: + camera_depth: rav1e-image/image + vggt_depth: rav1e-depth/depth + + - id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth diff --git a/examples/vggt/depth.dora-session.yaml b/examples/vggt/depth.dora-session.yaml deleted file mode 100644 index 13428f1b..00000000 --- a/examples/vggt/depth.dora-session.yaml +++ /dev/null @@ -1,8 +0,0 @@ -build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369 -session_id: 275de19c-e605-4865-bc5f-2f15916bade9 -git_sources: {} -local_build: - node_working_dirs: - camera: /Users/xaviertao/Documents/work/dora/examples/vggt - dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt - plot: /Users/xaviertao/Documents/work/dora/examples/vggt diff --git a/examples/vggt/image_saver.py b/examples/vggt/image_saver.py new file mode 100644 index 00000000..5552d3ba --- /dev/null +++ b/examples/vggt/image_saver.py @@ -0,0 +1,34 @@ +from dora import Node + +node = Node() + +index_dict = {} +i = 0 + +LEAD_TOPIC = "vggt_depth" + +for event in node: + if event["type"] == "INPUT": + if LEAD_TOPIC in event["id"]: + storage = event["value"] + metadata = event["metadata"] + encoding = metadata["encoding"] + width = metadata["width"] + height = metadata["height"] + + # Save to file + filename = f"out/{event['id']}_{i}.{encoding}" + with open(filename, "wb") as f: + f.write(storage.to_numpy()) + for key, value in index_dict.items(): + filename = f"out/{key}_{i}.{value['metadata']['encoding']}" + with open(filename, "wb") as f: + f.write(value["value"]) + i += 1 + else: + # Store the event in the index dictionary + index_dict[event["id"]] = { + "type": event["type"], + "value": event["value"].to_numpy(), + "metadata": event["metadata"], + } diff --git a/node-hub/dora-rav1e/Cargo.toml b/node-hub/dora-rav1e/Cargo.toml index 30a39070..e69302e1 100644 --- a/node-hub/dora-rav1e/Cargo.toml +++ b/node-hub/dora-rav1e/Cargo.toml @@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [ "eyre", "generate-import-lib", ], optional = true } -avif-serialize = "0.8.3" +avif-serialize = "0.8.4" [lib] diff --git a/node-hub/dora-rav1e/src/lib.rs b/node-hub/dora-rav1e/src/lib.rs index 22e43180..68280155 100644 --- a/node-hub/dora-rav1e/src/lib.rs +++ b/node-hub/dora-rav1e/src/lib.rs @@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> { if let Some(buffer) = data.as_primitive_opt::() { let mut buffer = buffer.values().to_vec(); if std::env::var("FILL_ZEROS") - .map(|s| s != "false") + .map(|s| s.to_lowercase() != "false") .unwrap_or(true) { fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); @@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> { let data = pkt.data; match output_encoding.as_str() { "avif" => { - warn!("avif encoding not supported for mono16"); + metadata.parameters.insert( + "encoding".to_string(), + Parameter::String("avif".to_string()), + ); + + let data = avif_serialize::Aviffy::new() + .full_color_range(false) + .set_seq_profile(0) + .set_monochrome(true) + .to_vec( + &data, + None, + enc.width as u32, + enc.height as u32, + enc.bit_depth as u8, + ); + + let arrow = data.into_arrow(); + + node.send_output(id, metadata.parameters.clone(), arrow) + .context("could not send output") + .unwrap(); } _ => { metadata.parameters.insert( diff --git a/node-hub/dora-vggt/dora_vggt/main.py b/node-hub/dora-vggt/dora_vggt/main.py index e35d174c..000898e0 100644 --- a/node-hub/dora-vggt/dora_vggt/main.py +++ b/node-hub/dora-vggt/dora_vggt/main.py @@ -3,6 +3,7 @@ import io import os from collections import deque + import cv2 import numpy as np import pyarrow as pa @@ -19,11 +20,15 @@ VGGT_NUM_IMAGES = int(os.getenv("VGGT_NUM_IMAGES", "2")) dtype = torch.bfloat16 +# Check if cuda is available and set the device accordingly +device = "cuda" if torch.cuda.is_available() else "cpu" + # Initialize the model and load the pretrained weights. # This will automatically download the model weights the first time it's run, which may take a while. -model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda") +model = VGGT.from_pretrained("facebook/VGGT-1B").to(device) model.eval() +DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64") # Import vecdeque @@ -34,7 +39,6 @@ def main(): for event in node: if event["type"] == "INPUT": - if "image" in event["id"]: storage = event["value"] metadata = event["metadata"] @@ -82,7 +86,7 @@ def main(): raw_images.append(buffer) with torch.no_grad(): - images = load_and_preprocess_images(raw_images).to("cuda") + images = load_and_preprocess_images(raw_images).to(device) images = images[None] # add batch dimension aggregated_tokens_list, ps_idx = model.aggregator(images) @@ -108,20 +112,24 @@ def main(): depth_map = depth_map[-1][-1].cpu().numpy() depth_map = SCALE_FACTOR * depth_map # Warning: Make sure to add my_output_id and my_input_id within the dataflow. + if DEPTH_ENCODING == "mono16": + depth_map = (depth_map * 1000).astype(np.uint16) + node.send_output( output_id=event["id"].replace("image", "depth"), data=pa.array(depth_map.ravel()), metadata={ "width": depth_map.shape[1], "height": depth_map.shape[0], - "focal": [ - int(f_0), - int(f_1), - ], - "resolution": [ - int(r_0), - int(r_1), - ], + "encoding": DEPTH_ENCODING, + "focal": [ + int(f_0), + int(f_1), + ], + "resolution": [ + int(r_0), + int(r_1), + ], }, )