diff --git a/Cargo.lock b/Cargo.lock index e3bf55bf..5cd2486e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,9 +1165,9 @@ dependencies = [ [[package]] name = "avif-serialize" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" +checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd" dependencies = [ "arrayvec", ] diff --git a/examples/vggt/depth-to-avif.yaml b/examples/vggt/depth-to-avif.yaml new file mode 100644 index 00000000..6db92ac3 --- /dev/null +++ b/examples/vggt/depth-to-avif.yaml @@ -0,0 +1,54 @@ +nodes: + - id: camera + build: pip install opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/100 + outputs: + - image + env: + CAPTURE_PATH: 1 + + - id: dora-vggt + build: pip install -e ../../node-hub/dora-vggt + path: dora-vggt + inputs: + image: camera/image + outputs: + - depth + - image + env: + DEPTH_ENCODING: mono16 + + - id: rav1e-depth + path: dora-rav1e + build: cargo build -p dora-rav1e --release + inputs: + depth: dora-vggt/depth + outputs: + - depth + env: + ENCODING: avif + + - id: rav1e-image + path: dora-rav1e + build: cargo build -p dora-rav1e --release + inputs: + image: dora-vggt/image + outputs: + - image + env: + ENCODING: avif + + - id: bench + path: image_saver.py + inputs: + camera_depth: rav1e-image/image + vggt_depth: rav1e-depth/depth + + - id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + camera/image: dora-vggt/image + camera/depth: dora-vggt/depth diff --git a/examples/vggt/depth.dora-session.yaml b/examples/vggt/depth.dora-session.yaml deleted file mode 100644 index 13428f1b..00000000 --- a/examples/vggt/depth.dora-session.yaml +++ /dev/null @@ -1,8 +0,0 @@ -build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369 -session_id: 275de19c-e605-4865-bc5f-2f15916bade9 -git_sources: {} -local_build: - node_working_dirs: - camera: /Users/xaviertao/Documents/work/dora/examples/vggt - dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt - plot: /Users/xaviertao/Documents/work/dora/examples/vggt diff --git a/examples/vggt/image_saver.py b/examples/vggt/image_saver.py new file mode 100644 index 00000000..5552d3ba --- /dev/null +++ b/examples/vggt/image_saver.py @@ -0,0 +1,34 @@ +from dora import Node + +node = Node() + +index_dict = {} +i = 0 + +LEAD_TOPIC = "vggt_depth" + +for event in node: + if event["type"] == "INPUT": + if LEAD_TOPIC in event["id"]: + storage = event["value"] + metadata = event["metadata"] + encoding = metadata["encoding"] + width = metadata["width"] + height = metadata["height"] + + # Save to file + filename = f"out/{event['id']}_{i}.{encoding}" + with open(filename, "wb") as f: + f.write(storage.to_numpy()) + for key, value in index_dict.items(): + filename = f"out/{key}_{i}.{value['metadata']['encoding']}" + with open(filename, "wb") as f: + f.write(value["value"]) + i += 1 + else: + # Store the event in the index dictionary + index_dict[event["id"]] = { + "type": event["type"], + "value": event["value"].to_numpy(), + "metadata": event["metadata"], + } diff --git a/node-hub/dora-rav1e/Cargo.toml b/node-hub/dora-rav1e/Cargo.toml index c2e35cd2..5dcb6b73 100644 --- a/node-hub/dora-rav1e/Cargo.toml +++ b/node-hub/dora-rav1e/Cargo.toml @@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [ "eyre", "generate-import-lib", ], optional = true } -avif-serialize = "0.8.3" +avif-serialize = "0.8.4" [lib] diff --git a/node-hub/dora-rav1e/src/lib.rs b/node-hub/dora-rav1e/src/lib.rs index 22e43180..68280155 100644 --- a/node-hub/dora-rav1e/src/lib.rs +++ b/node-hub/dora-rav1e/src/lib.rs @@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> { if let Some(buffer) = data.as_primitive_opt::() { let mut buffer = buffer.values().to_vec(); if std::env::var("FILL_ZEROS") - .map(|s| s != "false") + .map(|s| s.to_lowercase() != "false") .unwrap_or(true) { fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); @@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> { let data = pkt.data; match output_encoding.as_str() { "avif" => { - warn!("avif encoding not supported for mono16"); + metadata.parameters.insert( + "encoding".to_string(), + Parameter::String("avif".to_string()), + ); + + let data = avif_serialize::Aviffy::new() + .full_color_range(false) + .set_seq_profile(0) + .set_monochrome(true) + .to_vec( + &data, + None, + enc.width as u32, + enc.height as u32, + enc.bit_depth as u8, + ); + + let arrow = data.into_arrow(); + + node.send_output(id, metadata.parameters.clone(), arrow) + .context("could not send output") + .unwrap(); } _ => { metadata.parameters.insert( diff --git a/node-hub/dora-vggt/dora_vggt/main.py b/node-hub/dora-vggt/dora_vggt/main.py index 7c0e24c7..500e665d 100644 --- a/node-hub/dora-vggt/dora_vggt/main.py +++ b/node-hub/dora-vggt/dora_vggt/main.py @@ -1,6 +1,7 @@ """TODO: Add docstring.""" import io +import os from collections import deque as Deque import cv2 @@ -17,11 +18,15 @@ from vggt.utils.pose_enc import pose_encoding_to_extri_intri dtype = torch.bfloat16 +# Check if cuda is available and set the device accordingly +device = "cuda" if torch.cuda.is_available() else "cpu" + # Initialize the model and load the pretrained weights. # This will automatically download the model weights the first time it's run, which may take a while. -model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda") +model = VGGT.from_pretrained("facebook/VGGT-1B").to(device) model.eval() +DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64") # Import vecdeque @@ -32,7 +37,6 @@ def main(): for event in node: if event["type"] == "INPUT": - if "image" in event["id"]: storage = event["value"] metadata = event["metadata"] @@ -80,7 +84,7 @@ def main(): raw_images.append(buffer) with torch.no_grad(): - images = load_and_preprocess_images(raw_images).to("cuda") + images = load_and_preprocess_images(raw_images).to(device) images = images[None] # add batch dimension aggregated_tokens_list, ps_idx = model.aggregator(images) @@ -107,20 +111,24 @@ def main(): depth_map = depth_map[-1][-1].cpu().numpy() # Warning: Make sure to add my_output_id and my_input_id within the dataflow. + if DEPTH_ENCODING == "mono16": + depth_map = (depth_map * 1000).astype(np.uint16) + node.send_output( output_id="depth", data=pa.array(depth_map.ravel()), metadata={ "width": depth_map.shape[1], "height": depth_map.shape[0], - "focal": [ - int(f_0), - int(f_1), - ], - "resolution": [ - int(r_0), - int(r_1), - ], + "encoding": DEPTH_ENCODING, + "focal": [ + int(f_0), + int(f_1), + ], + "resolution": [ + int(r_0), + int(r_1), + ], }, )