This makes it possible to use rav1e to encode monochrome depth image to be stored or shared as standalone avif file. This makes it easier to work with depth image outside of dora-rs.tags/v0.3.12
| @@ -1165,9 +1165,9 @@ dependencies = [ | |||
| [[package]] | |||
| name = "avif-serialize" | |||
| version = "0.8.3" | |||
| version = "0.8.4" | |||
| source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" | |||
| checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd" | |||
| dependencies = [ | |||
| "arrayvec", | |||
| ] | |||
| @@ -0,0 +1,54 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/100 | |||
| outputs: | |||
| - image | |||
| env: | |||
| CAPTURE_PATH: 1 | |||
| - id: dora-vggt | |||
| build: pip install -e ../../node-hub/dora-vggt | |||
| path: dora-vggt | |||
| inputs: | |||
| image: camera/image | |||
| outputs: | |||
| - depth | |||
| - image | |||
| env: | |||
| DEPTH_ENCODING: mono16 | |||
| - id: rav1e-depth | |||
| path: dora-rav1e | |||
| build: cargo build -p dora-rav1e --release | |||
| inputs: | |||
| depth: dora-vggt/depth | |||
| outputs: | |||
| - depth | |||
| env: | |||
| ENCODING: avif | |||
| - id: rav1e-image | |||
| path: dora-rav1e | |||
| build: cargo build -p dora-rav1e --release | |||
| inputs: | |||
| image: dora-vggt/image | |||
| outputs: | |||
| - image | |||
| env: | |||
| ENCODING: avif | |||
| - id: bench | |||
| path: image_saver.py | |||
| inputs: | |||
| camera_depth: rav1e-image/image | |||
| vggt_depth: rav1e-depth/depth | |||
| - id: plot | |||
| build: pip install dora-rerun | |||
| path: dora-rerun | |||
| inputs: | |||
| camera/image: dora-vggt/image | |||
| camera/depth: dora-vggt/depth | |||
| @@ -1,8 +0,0 @@ | |||
| build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369 | |||
| session_id: 275de19c-e605-4865-bc5f-2f15916bade9 | |||
| git_sources: {} | |||
| local_build: | |||
| node_working_dirs: | |||
| camera: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| plot: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| @@ -0,0 +1,34 @@ | |||
| from dora import Node | |||
| node = Node() | |||
| index_dict = {} | |||
| i = 0 | |||
| LEAD_TOPIC = "vggt_depth" | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| if LEAD_TOPIC in event["id"]: | |||
| storage = event["value"] | |||
| metadata = event["metadata"] | |||
| encoding = metadata["encoding"] | |||
| width = metadata["width"] | |||
| height = metadata["height"] | |||
| # Save to file | |||
| filename = f"out/{event['id']}_{i}.{encoding}" | |||
| with open(filename, "wb") as f: | |||
| f.write(storage.to_numpy()) | |||
| for key, value in index_dict.items(): | |||
| filename = f"out/{key}_{i}.{value['metadata']['encoding']}" | |||
| with open(filename, "wb") as f: | |||
| f.write(value["value"]) | |||
| i += 1 | |||
| else: | |||
| # Store the event in the index dictionary | |||
| index_dict[event["id"]] = { | |||
| "type": event["type"], | |||
| "value": event["value"].to_numpy(), | |||
| "metadata": event["metadata"], | |||
| } | |||
| @@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [ | |||
| "eyre", | |||
| "generate-import-lib", | |||
| ], optional = true } | |||
| avif-serialize = "0.8.3" | |||
| avif-serialize = "0.8.4" | |||
| [lib] | |||
| @@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> { | |||
| if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() { | |||
| let mut buffer = buffer.values().to_vec(); | |||
| if std::env::var("FILL_ZEROS") | |||
| .map(|s| s != "false") | |||
| .map(|s| s.to_lowercase() != "false") | |||
| .unwrap_or(true) | |||
| { | |||
| fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); | |||
| @@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> { | |||
| let data = pkt.data; | |||
| match output_encoding.as_str() { | |||
| "avif" => { | |||
| warn!("avif encoding not supported for mono16"); | |||
| metadata.parameters.insert( | |||
| "encoding".to_string(), | |||
| Parameter::String("avif".to_string()), | |||
| ); | |||
| let data = avif_serialize::Aviffy::new() | |||
| .full_color_range(false) | |||
| .set_seq_profile(0) | |||
| .set_monochrome(true) | |||
| .to_vec( | |||
| &data, | |||
| None, | |||
| enc.width as u32, | |||
| enc.height as u32, | |||
| enc.bit_depth as u8, | |||
| ); | |||
| let arrow = data.into_arrow(); | |||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||
| .context("could not send output") | |||
| .unwrap(); | |||
| } | |||
| _ => { | |||
| metadata.parameters.insert( | |||
| @@ -1,6 +1,7 @@ | |||
| """TODO: Add docstring.""" | |||
| import io | |||
| import os | |||
| from collections import deque as Deque | |||
| import cv2 | |||
| @@ -17,11 +18,15 @@ from vggt.utils.pose_enc import pose_encoding_to_extri_intri | |||
| dtype = torch.bfloat16 | |||
| # Check if cuda is available and set the device accordingly | |||
| device = "cuda" if torch.cuda.is_available() else "cpu" | |||
| # Initialize the model and load the pretrained weights. | |||
| # This will automatically download the model weights the first time it's run, which may take a while. | |||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda") | |||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to(device) | |||
| model.eval() | |||
| DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64") | |||
| # Import vecdeque | |||
| @@ -32,7 +37,6 @@ def main(): | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| if "image" in event["id"]: | |||
| storage = event["value"] | |||
| metadata = event["metadata"] | |||
| @@ -80,7 +84,7 @@ def main(): | |||
| raw_images.append(buffer) | |||
| with torch.no_grad(): | |||
| images = load_and_preprocess_images(raw_images).to("cuda") | |||
| images = load_and_preprocess_images(raw_images).to(device) | |||
| images = images[None] # add batch dimension | |||
| aggregated_tokens_list, ps_idx = model.aggregator(images) | |||
| @@ -107,20 +111,24 @@ def main(): | |||
| depth_map = depth_map[-1][-1].cpu().numpy() | |||
| # Warning: Make sure to add my_output_id and my_input_id within the dataflow. | |||
| if DEPTH_ENCODING == "mono16": | |||
| depth_map = (depth_map * 1000).astype(np.uint16) | |||
| node.send_output( | |||
| output_id="depth", | |||
| data=pa.array(depth_map.ravel()), | |||
| metadata={ | |||
| "width": depth_map.shape[1], | |||
| "height": depth_map.shape[0], | |||
| "focal": [ | |||
| int(f_0), | |||
| int(f_1), | |||
| ], | |||
| "resolution": [ | |||
| int(r_0), | |||
| int(r_1), | |||
| ], | |||
| "encoding": DEPTH_ENCODING, | |||
| "focal": [ | |||
| int(f_0), | |||
| int(f_1), | |||
| ], | |||
| "resolution": [ | |||
| int(r_0), | |||
| int(r_1), | |||
| ], | |||
| }, | |||
| ) | |||