| @@ -1165,9 +1165,9 @@ dependencies = [ | |||||
| [[package]] | [[package]] | ||||
| name = "avif-serialize" | name = "avif-serialize" | ||||
| version = "0.8.3" | |||||
| version = "0.8.4" | |||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" | |||||
| checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd" | |||||
| dependencies = [ | dependencies = [ | ||||
| "arrayvec", | "arrayvec", | ||||
| ] | ] | ||||
| @@ -0,0 +1,54 @@ | |||||
| nodes: | |||||
| - id: camera | |||||
| build: pip install opencv-video-capture | |||||
| path: opencv-video-capture | |||||
| inputs: | |||||
| tick: dora/timer/millis/100 | |||||
| outputs: | |||||
| - image | |||||
| env: | |||||
| CAPTURE_PATH: 1 | |||||
| - id: dora-vggt | |||||
| build: pip install -e ../../node-hub/dora-vggt | |||||
| path: dora-vggt | |||||
| inputs: | |||||
| image: camera/image | |||||
| outputs: | |||||
| - depth | |||||
| - image | |||||
| env: | |||||
| DEPTH_ENCODING: mono16 | |||||
| - id: rav1e-depth | |||||
| path: dora-rav1e | |||||
| build: cargo build -p dora-rav1e --release | |||||
| inputs: | |||||
| depth: dora-vggt/depth | |||||
| outputs: | |||||
| - depth | |||||
| env: | |||||
| ENCODING: avif | |||||
| - id: rav1e-image | |||||
| path: dora-rav1e | |||||
| build: cargo build -p dora-rav1e --release | |||||
| inputs: | |||||
| image: dora-vggt/image | |||||
| outputs: | |||||
| - image | |||||
| env: | |||||
| ENCODING: avif | |||||
| - id: bench | |||||
| path: image_saver.py | |||||
| inputs: | |||||
| camera_depth: rav1e-image/image | |||||
| vggt_depth: rav1e-depth/depth | |||||
| - id: plot | |||||
| build: pip install dora-rerun | |||||
| path: dora-rerun | |||||
| inputs: | |||||
| camera/image: dora-vggt/image | |||||
| camera/depth: dora-vggt/depth | |||||
| @@ -1,8 +0,0 @@ | |||||
| build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369 | |||||
| session_id: 275de19c-e605-4865-bc5f-2f15916bade9 | |||||
| git_sources: {} | |||||
| local_build: | |||||
| node_working_dirs: | |||||
| camera: /Users/xaviertao/Documents/work/dora/examples/vggt | |||||
| dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt | |||||
| plot: /Users/xaviertao/Documents/work/dora/examples/vggt | |||||
| @@ -0,0 +1,34 @@ | |||||
| from dora import Node | |||||
| node = Node() | |||||
| index_dict = {} | |||||
| i = 0 | |||||
| LEAD_TOPIC = "vggt_depth" | |||||
| for event in node: | |||||
| if event["type"] == "INPUT": | |||||
| if LEAD_TOPIC in event["id"]: | |||||
| storage = event["value"] | |||||
| metadata = event["metadata"] | |||||
| encoding = metadata["encoding"] | |||||
| width = metadata["width"] | |||||
| height = metadata["height"] | |||||
| # Save to file | |||||
| filename = f"out/{event['id']}_{i}.{encoding}" | |||||
| with open(filename, "wb") as f: | |||||
| f.write(storage.to_numpy()) | |||||
| for key, value in index_dict.items(): | |||||
| filename = f"out/{key}_{i}.{value['metadata']['encoding']}" | |||||
| with open(filename, "wb") as f: | |||||
| f.write(value["value"]) | |||||
| i += 1 | |||||
| else: | |||||
| # Store the event in the index dictionary | |||||
| index_dict[event["id"]] = { | |||||
| "type": event["type"], | |||||
| "value": event["value"].to_numpy(), | |||||
| "metadata": event["metadata"], | |||||
| } | |||||
| @@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [ | |||||
| "eyre", | "eyre", | ||||
| "generate-import-lib", | "generate-import-lib", | ||||
| ], optional = true } | ], optional = true } | ||||
| avif-serialize = "0.8.3" | |||||
| avif-serialize = "0.8.4" | |||||
| [lib] | [lib] | ||||
| @@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> { | |||||
| if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() { | if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() { | ||||
| let mut buffer = buffer.values().to_vec(); | let mut buffer = buffer.values().to_vec(); | ||||
| if std::env::var("FILL_ZEROS") | if std::env::var("FILL_ZEROS") | ||||
| .map(|s| s != "false") | |||||
| .map(|s| s.to_lowercase() != "false") | |||||
| .unwrap_or(true) | .unwrap_or(true) | ||||
| { | { | ||||
| fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); | fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height); | ||||
| @@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> { | |||||
| let data = pkt.data; | let data = pkt.data; | ||||
| match output_encoding.as_str() { | match output_encoding.as_str() { | ||||
| "avif" => { | "avif" => { | ||||
| warn!("avif encoding not supported for mono16"); | |||||
| metadata.parameters.insert( | |||||
| "encoding".to_string(), | |||||
| Parameter::String("avif".to_string()), | |||||
| ); | |||||
| let data = avif_serialize::Aviffy::new() | |||||
| .full_color_range(false) | |||||
| .set_seq_profile(0) | |||||
| .set_monochrome(true) | |||||
| .to_vec( | |||||
| &data, | |||||
| None, | |||||
| enc.width as u32, | |||||
| enc.height as u32, | |||||
| enc.bit_depth as u8, | |||||
| ); | |||||
| let arrow = data.into_arrow(); | |||||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||||
| .context("could not send output") | |||||
| .unwrap(); | |||||
| } | } | ||||
| _ => { | _ => { | ||||
| metadata.parameters.insert( | metadata.parameters.insert( | ||||
| @@ -1,6 +1,7 @@ | |||||
| """TODO: Add docstring.""" | """TODO: Add docstring.""" | ||||
| import io | import io | ||||
| import os | |||||
| from collections import deque as Deque | from collections import deque as Deque | ||||
| import cv2 | import cv2 | ||||
| @@ -17,11 +18,15 @@ from vggt.utils.pose_enc import pose_encoding_to_extri_intri | |||||
| dtype = torch.bfloat16 | dtype = torch.bfloat16 | ||||
| # Check if cuda is available and set the device accordingly | |||||
| device = "cuda" if torch.cuda.is_available() else "cpu" | |||||
| # Initialize the model and load the pretrained weights. | # Initialize the model and load the pretrained weights. | ||||
| # This will automatically download the model weights the first time it's run, which may take a while. | # This will automatically download the model weights the first time it's run, which may take a while. | ||||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda") | |||||
| model = VGGT.from_pretrained("facebook/VGGT-1B").to(device) | |||||
| model.eval() | model.eval() | ||||
| DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64") | |||||
| # Import vecdeque | # Import vecdeque | ||||
| @@ -32,7 +37,6 @@ def main(): | |||||
| for event in node: | for event in node: | ||||
| if event["type"] == "INPUT": | if event["type"] == "INPUT": | ||||
| if "image" in event["id"]: | if "image" in event["id"]: | ||||
| storage = event["value"] | storage = event["value"] | ||||
| metadata = event["metadata"] | metadata = event["metadata"] | ||||
| @@ -80,7 +84,7 @@ def main(): | |||||
| raw_images.append(buffer) | raw_images.append(buffer) | ||||
| with torch.no_grad(): | with torch.no_grad(): | ||||
| images = load_and_preprocess_images(raw_images).to("cuda") | |||||
| images = load_and_preprocess_images(raw_images).to(device) | |||||
| images = images[None] # add batch dimension | images = images[None] # add batch dimension | ||||
| aggregated_tokens_list, ps_idx = model.aggregator(images) | aggregated_tokens_list, ps_idx = model.aggregator(images) | ||||
| @@ -107,20 +111,24 @@ def main(): | |||||
| depth_map = depth_map[-1][-1].cpu().numpy() | depth_map = depth_map[-1][-1].cpu().numpy() | ||||
| # Warning: Make sure to add my_output_id and my_input_id within the dataflow. | # Warning: Make sure to add my_output_id and my_input_id within the dataflow. | ||||
| if DEPTH_ENCODING == "mono16": | |||||
| depth_map = (depth_map * 1000).astype(np.uint16) | |||||
| node.send_output( | node.send_output( | ||||
| output_id="depth", | output_id="depth", | ||||
| data=pa.array(depth_map.ravel()), | data=pa.array(depth_map.ravel()), | ||||
| metadata={ | metadata={ | ||||
| "width": depth_map.shape[1], | "width": depth_map.shape[1], | ||||
| "height": depth_map.shape[0], | "height": depth_map.shape[0], | ||||
| "focal": [ | |||||
| int(f_0), | |||||
| int(f_1), | |||||
| ], | |||||
| "resolution": [ | |||||
| int(r_0), | |||||
| int(r_1), | |||||
| ], | |||||
| "encoding": DEPTH_ENCODING, | |||||
| "focal": [ | |||||
| int(f_0), | |||||
| int(f_1), | |||||
| ], | |||||
| "resolution": [ | |||||
| int(r_0), | |||||
| int(r_1), | |||||
| ], | |||||
| }, | }, | ||||
| ) | ) | ||||