| @@ -1165,9 +1165,9 @@ dependencies = [ | |||
| [[package]] | |||
| name = "avif-serialize" | |||
| version = "0.8.3" | |||
| version = "0.8.4" | |||
| source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" | |||
| checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd" | |||
| dependencies = [ | |||
| "arrayvec", | |||
| ] | |||
| @@ -0,0 +1,54 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/100 | |||
| outputs: | |||
| - image | |||
| env: | |||
| CAPTURE_PATH: 1 | |||
| - id: dora-vggt | |||
| build: pip install -e ../../node-hub/dora-vggt | |||
| path: dora-vggt | |||
| inputs: | |||
| image: camera/image | |||
| outputs: | |||
| - depth | |||
| - image | |||
| env: | |||
| DEPTH_ENCODING: mono16 | |||
| - id: rav1e-depth | |||
| path: dora-rav1e | |||
| build: cargo build -p dora-rav1e --release | |||
| inputs: | |||
| depth: dora-vggt/depth | |||
| outputs: | |||
| - depth | |||
| env: | |||
| ENCODING: avif | |||
| - id: rav1e-image | |||
| path: dora-rav1e | |||
| build: cargo build -p dora-rav1e --release | |||
| inputs: | |||
| image: dora-vggt/image | |||
| outputs: | |||
| - image | |||
| env: | |||
| ENCODING: avif | |||
| - id: bench | |||
| path: image_saver.py | |||
| inputs: | |||
| camera_depth: rav1e-image/image | |||
| vggt_depth: rav1e-depth/depth | |||
| - id: plot | |||
| build: pip install dora-rerun | |||
| path: dora-rerun | |||
| inputs: | |||
| camera/image: dora-vggt/image | |||
| camera/depth: dora-vggt/depth | |||
| @@ -1,8 +0,0 @@ | |||
| build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369 | |||
| session_id: 275de19c-e605-4865-bc5f-2f15916bade9 | |||
| git_sources: {} | |||
| local_build: | |||
| node_working_dirs: | |||
| camera: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| plot: /Users/xaviertao/Documents/work/dora/examples/vggt | |||
| @@ -0,0 +1,34 @@ | |||
| from dora import Node | |||
| node = Node() | |||
| index_dict = {} | |||
| i = 0 | |||
| LEAD_TOPIC = "vggt_depth" | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| if LEAD_TOPIC in event["id"]: | |||
| storage = event["value"] | |||
| metadata = event["metadata"] | |||
| encoding = metadata["encoding"] | |||
| width = metadata["width"] | |||
| height = metadata["height"] | |||
| # Save to file | |||
| filename = f"out/{event['id']}_{i}.{encoding}" | |||
| with open(filename, "wb") as f: | |||
| f.write(storage.to_numpy()) | |||
| for key, value in index_dict.items(): | |||
| filename = f"out/{key}_{i}.{value['metadata']['encoding']}" | |||
| with open(filename, "wb") as f: | |||
| f.write(value["value"]) | |||
| i += 1 | |||
| else: | |||
| # Store the event in the index dictionary | |||
| index_dict[event["id"]] = { | |||
| "type": event["type"], | |||
| "value": event["value"].to_numpy(), | |||
| "metadata": event["metadata"], | |||
| } | |||
| @@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [ | |||
| "eyre", | |||
| "generate-import-lib", | |||
| ], optional = true } | |||
| avif-serialize = "0.8.3" | |||
| avif-serialize = "0.8.4" | |||
| [lib] | |||
| @@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> { | |||
| if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() { | |||
| let mut buffer = buffer.values().to_vec(); | |||
| if std::env::var("FILL_ZEROS") | |||
| .map(|s| s != "false") | |||
| .map(|s| s.to_lowercase() != "false") | |||
| .unwrap_or(true) | |||
| { | |||
| fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); | |||
| @@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> { | |||
| let data = pkt.data; | |||
| match output_encoding.as_str() { | |||
| "avif" => { | |||
| warn!("avif encoding not supported for mono16"); | |||
| metadata.parameters.insert( | |||
| "encoding".to_string(), | |||
| Parameter::String("avif".to_string()), | |||
| ); | |||
| let data = avif_serialize::Aviffy::new() | |||
| .full_color_range(false) | |||
| .set_seq_profile(0) | |||
| .set_monochrome(true) | |||
| .to_vec( | |||
| &data, | |||
| None, | |||
| enc.width as u32, | |||
| enc.height as u32, | |||
| enc.bit_depth as u8, | |||
| ); | |||
| let arrow = data.into_arrow(); | |||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||
| .context("could not send output") | |||
| .unwrap(); | |||
| } | |||
| _ => { | |||
| metadata.parameters.insert( | |||
| @@ -1,6 +1,7 @@ | |||
| """TODO: Add docstring.""" | |||
| import io | |||
| import os | |||
| from collections import deque as Deque | |||
| import cv2 | |||
| @@ -17,11 +18,15 @@ from vggt.utils.pose_enc import pose_encoding_to_extri_intri | |||
| dtype = torch.bfloat16 | |||
| # Check if cuda is available and set the device accordingly | |||
| device = "cuda" if torch.cuda.is_available() else "cpu" | |||
| # Initialize the model and load the pretrained weights. | |||
| # This will automatically download the model weights the first time it's run, which may take a while. | |||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda") | |||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to(device) | |||
| model.eval() | |||
| DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64") | |||
| # Import vecdeque | |||
| @@ -32,7 +37,6 @@ def main(): | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| if "image" in event["id"]: | |||
| storage = event["value"] | |||
| metadata = event["metadata"] | |||
| @@ -80,7 +84,7 @@ def main(): | |||
| raw_images.append(buffer) | |||
| with torch.no_grad(): | |||
| images = load_and_preprocess_images(raw_images).to("cuda") | |||
| images = load_and_preprocess_images(raw_images).to(device) | |||
| images = images[None] # add batch dimension | |||
| aggregated_tokens_list, ps_idx = model.aggregator(images) | |||
| @@ -107,20 +111,24 @@ def main(): | |||
| depth_map = depth_map[-1][-1].cpu().numpy() | |||
| # Warning: Make sure to add my_output_id and my_input_id within the dataflow. | |||
| if DEPTH_ENCODING == "mono16": | |||
| depth_map = (depth_map * 1000).astype(np.uint16) | |||
| node.send_output( | |||
| output_id="depth", | |||
| data=pa.array(depth_map.ravel()), | |||
| metadata={ | |||
| "width": depth_map.shape[1], | |||
| "height": depth_map.shape[0], | |||
| "focal": [ | |||
| int(f_0), | |||
| int(f_1), | |||
| ], | |||
| "resolution": [ | |||
| int(r_0), | |||
| int(r_1), | |||
| ], | |||
| "encoding": DEPTH_ENCODING, | |||
| "focal": [ | |||
| int(f_0), | |||
| int(f_1), | |||
| ], | |||
| "resolution": [ | |||
| int(r_0), | |||
| int(r_1), | |||
| ], | |||
| }, | |||
| ) | |||