Browse Source

Adding monochrome encoding and examples

tags/v0.3.12
haixuantao 7 months ago
parent
commit
2cef9eb626
7 changed files with 133 additions and 24 deletions
  1. +2
    -2
      Cargo.lock
  2. +54
    -0
      examples/vggt/depth-to-avif.yaml
  3. +0
    -8
      examples/vggt/depth.dora-session.yaml
  4. +34
    -0
      examples/vggt/image_saver.py
  5. +1
    -1
      node-hub/dora-rav1e/Cargo.toml
  6. +23
    -2
      node-hub/dora-rav1e/src/lib.rs
  7. +19
    -11
      node-hub/dora-vggt/dora_vggt/main.py

+ 2
- 2
Cargo.lock View File

@@ -1165,9 +1165,9 @@ dependencies = [

[[package]]
name = "avif-serialize"
version = "0.8.3"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e"
checksum = "19135c0c7a60bfee564dbe44ab5ce0557c6bf3884e5291a50be76a15640c4fbd"
dependencies = [
"arrayvec",
]


+ 54
- 0
examples/vggt/depth-to-avif.yaml View File

@@ -0,0 +1,54 @@
nodes:
- id: camera
build: pip install opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 1

- id: dora-vggt
build: pip install -e ../../node-hub/dora-vggt
path: dora-vggt
inputs:
image: camera/image
outputs:
- depth
- image
env:
DEPTH_ENCODING: mono16

- id: rav1e-depth
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
depth: dora-vggt/depth
outputs:
- depth
env:
ENCODING: avif

- id: rav1e-image
path: dora-rav1e
build: cargo build -p dora-rav1e --release
inputs:
image: dora-vggt/image
outputs:
- image
env:
ENCODING: avif

- id: bench
path: image_saver.py
inputs:
camera_depth: rav1e-image/image
vggt_depth: rav1e-depth/depth

- id: plot
build: pip install dora-rerun
path: dora-rerun
inputs:
camera/image: dora-vggt/image
camera/depth: dora-vggt/depth

+ 0
- 8
examples/vggt/depth.dora-session.yaml View File

@@ -1,8 +0,0 @@
build_id: 2b402c1e-e52e-45e9-86e5-236b33a77369
session_id: 275de19c-e605-4865-bc5f-2f15916bade9
git_sources: {}
local_build:
node_working_dirs:
camera: /Users/xaviertao/Documents/work/dora/examples/vggt
dora-vggt: /Users/xaviertao/Documents/work/dora/examples/vggt
plot: /Users/xaviertao/Documents/work/dora/examples/vggt

+ 34
- 0
examples/vggt/image_saver.py View File

@@ -0,0 +1,34 @@
from dora import Node

node = Node()

index_dict = {}
i = 0

LEAD_TOPIC = "vggt_depth"

for event in node:
if event["type"] == "INPUT":
if LEAD_TOPIC in event["id"]:
storage = event["value"]
metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

# Save to file
filename = f"out/{event['id']}_{i}.{encoding}"
with open(filename, "wb") as f:
f.write(storage.to_numpy())
for key, value in index_dict.items():
filename = f"out/{key}_{i}.{value['metadata']['encoding']}"
with open(filename, "wb") as f:
f.write(value["value"])
i += 1
else:
# Store the event in the index dictionary
index_dict[event["id"]] = {
"type": event["type"],
"value": event["value"].to_numpy(),
"metadata": event["metadata"],
}

+ 1
- 1
node-hub/dora-rav1e/Cargo.toml View File

@@ -25,7 +25,7 @@ pyo3 = { workspace = true, features = [
"eyre",
"generate-import-lib",
], optional = true }
avif-serialize = "0.8.3"
avif-serialize = "0.8.4"


[lib]


+ 23
- 2
node-hub/dora-rav1e/src/lib.rs View File

@@ -336,7 +336,7 @@ pub fn lib_main() -> Result<()> {
if let Some(buffer) = data.as_primitive_opt::<UInt16Type>() {
let mut buffer = buffer.values().to_vec();
if std::env::var("FILL_ZEROS")
.map(|s| s != "false")
.map(|s| s.to_lowercase() != "false")
.unwrap_or(true)
{
fill_zeros_toward_center_y_plane_in_place(&mut buffer, width, height);
@@ -370,7 +370,28 @@ pub fn lib_main() -> Result<()> {
let data = pkt.data;
match output_encoding.as_str() {
"avif" => {
warn!("avif encoding not supported for mono16");
metadata.parameters.insert(
"encoding".to_string(),
Parameter::String("avif".to_string()),
);

let data = avif_serialize::Aviffy::new()
.full_color_range(false)
.set_seq_profile(0)
.set_monochrome(true)
.to_vec(
&data,
None,
enc.width as u32,
enc.height as u32,
enc.bit_depth as u8,
);

let arrow = data.into_arrow();

node.send_output(id, metadata.parameters.clone(), arrow)
.context("could not send output")
.unwrap();
}
_ => {
metadata.parameters.insert(


+ 19
- 11
node-hub/dora-vggt/dora_vggt/main.py View File

@@ -1,6 +1,7 @@
"""TODO: Add docstring."""

import io
import os
from collections import deque as Deque

import cv2
@@ -17,11 +18,15 @@ from vggt.utils.pose_enc import pose_encoding_to_extri_intri

dtype = torch.bfloat16

# Check if cuda is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the model and load the pretrained weights.
# This will automatically download the model weights the first time it's run, which may take a while.
model = VGGT.from_pretrained("facebook/VGGT-1B").to("cuda")
model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)
model.eval()

DEPTH_ENCODING = os.environ.get("DEPTH_ENCODING", "float64")
# Import vecdeque


@@ -32,7 +37,6 @@ def main():

for event in node:
if event["type"] == "INPUT":

if "image" in event["id"]:
storage = event["value"]
metadata = event["metadata"]
@@ -80,7 +84,7 @@ def main():
raw_images.append(buffer)

with torch.no_grad():
images = load_and_preprocess_images(raw_images).to("cuda")
images = load_and_preprocess_images(raw_images).to(device)

images = images[None] # add batch dimension
aggregated_tokens_list, ps_idx = model.aggregator(images)
@@ -107,20 +111,24 @@ def main():
depth_map = depth_map[-1][-1].cpu().numpy()

# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
if DEPTH_ENCODING == "mono16":
depth_map = (depth_map * 1000).astype(np.uint16)

node.send_output(
output_id="depth",
data=pa.array(depth_map.ravel()),
metadata={
"width": depth_map.shape[1],
"height": depth_map.shape[0],
"focal": [
int(f_0),
int(f_1),
],
"resolution": [
int(r_0),
int(r_1),
],
"encoding": DEPTH_ENCODING,
"focal": [
int(f_0),
int(f_1),
],
"resolution": [
int(r_0),
int(r_1),
],
},
)



Loading…
Cancel
Save