Refactor rerun example by using metadata from both image and bbox definition to make our input more generalistic.

Rewrite README documentation to reflect metadata changes
1 year ago · dc0d2f2515
--- a/examples/rerun-viewer/dataflow.yml
+++ b/examples/rerun-viewer/dataflow.yml
@@ -1,48 +1,38 @@
 nodes:
  - id: webcam
    custom:
      source: ./webcam.py
      inputs:
        tick:
          source: dora/timer/millis/10
          queue_size: 1000
      outputs:
        - image
        - text
      envs:
        IMAGE_WIDTH: 960
        IMAGE_HEIGHT: 540
  - id: camera
    build: pip install ../../node-hub/opencv-video-capture
    path: opencv-video-capture
    inputs:
      tick: dora/timer/millis/20
    outputs:
      - image
    env:
      CAPTURE_PATH: 0
      IMAGE_WIDTH: 640
      IMAGE_HEIGHT: 480
      ENCODING: rgb8


  - id: object_detection
    custom:
      source: ./object_detection.py
      inputs:
        image: webcam/image
      outputs:
        - bbox
      envs:
        IMAGE_WIDTH: 960
        IMAGE_HEIGHT: 540
  - id: object-detection
    build: pip install -e ../../node-hub/ultralytics-yolo
    path: ultralytics-yolo
    inputs:
      image:
        source: camera/image
        queue_size: 1
    outputs:
      - bbox
    env:
      MODEL: yolov8n.pt
      FORMAT: xywh

  - id: rerun
    custom:
      source: dora-rerun
      inputs:
        image: webcam/image
        text: webcam/text
        boxes2d: object_detection/bbox
      envs:
        IMAGE_WIDTH: 960
        IMAGE_HEIGHT: 540
        IMAGE_DEPTH: 3

  - id: matplotlib
    custom:
      source: ./plot.py
      inputs:
        image: webcam/image
        bbox: object_detection/bbox
      envs:
        IMAGE_WIDTH: 960
        IMAGE_HEIGHT: 540
    build: cargo build -p dora-rerun --release
    path: dora-rerun
    inputs:
      image:
        source: camera/image
        queue_size: 1
      boxes2d: object-detection/bbox
    env:
      RERUN_FLUSH_TICK_SECS: "0.001"
      RERUN_MEMORY_LIMIT: 25%
--- a/examples/rerun-viewer/object_detection.py
+++ b/examples/rerun-viewer/object_detection.py
@@ -1,45 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import os
 import cv2
 import numpy as np
 from ultralytics import YOLO

 from dora import Node
 import pyarrow as pa

 model = YOLO("yolov8n.pt")

 node = Node()

 IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
 IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))

 for event in node:
    event_type = event["type"]
    if event_type == "INPUT":
        event_id = event["id"]
        if event_id == "image":
            print("[object detection] received image input")
            image = event["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3))

            frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
            results = model(frame)  # includes NMS
            # Process results
            boxes = np.array(results[0].boxes.xywh.cpu())
            conf = np.array(results[0].boxes.conf.cpu())
            label = np.array(results[0].boxes.cls.cpu())
            # concatenate them together
            arrays = np.concatenate((boxes, conf[:, None], label[:, None]), axis=1)

            node.send_output("bbox", pa.array(arrays.ravel()), event["metadata"])
        else:
            print("[object detection] ignoring unexpected input:", event_id)
    elif event_type == "STOP":
        print("[object detection] received stop")
    elif event_type == "ERROR":
        print("[object detection] error: ", event["error"])
    else:
        print("[object detection] received unexpected event:", event_type)
--- a/examples/rerun-viewer/plot.py
+++ b/examples/rerun-viewer/plot.py
@@ -1,90 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import os
 from dora import Node
 from dora import DoraStatus

 import cv2
 import numpy as np

 CI = os.environ.get("CI")

 font = cv2.FONT_HERSHEY_SIMPLEX

 IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
 IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))


 class Plotter:
    """
    Plot image and bounding box
    """

    def __init__(self):
        self.image = []
        self.bboxs = []

    def on_input(
        self,
        dora_input,
    ) -> DoraStatus:
        """
        Put image and bounding box on cv2 window.

        Args:
            dora_input["id"] (str): Id of the dora_input declared in the yaml configuration
            dora_input["value"] (arrow array): message of the dora_input
        """
        if dora_input["id"] == "image":
            image = (
                dora_input["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3))
            )

            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            self.image = image

        elif dora_input["id"] == "bbox" and len(self.image) != 0:
            bboxs = dora_input["value"].to_numpy()
            self.bboxs = np.reshape(bboxs, (-1, 6))
        for bbox in self.bboxs:
            [
                x,
                y,
                w,
                h,
                confidence,
                label,
            ] = bbox
            cv2.rectangle(
                self.image,
                (int(x - w / 2), int(y - h / 2)),
                (int(x + w / 2), int(y + h / 2)),
                (0, 255, 0),
                2,
            )

        if CI != "true":
            cv2.imshow("frame", self.image)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                return DoraStatus.STOP

        return DoraStatus.CONTINUE


 plotter = Plotter()
 node = Node()

 for event in node:
    event_type = event["type"]
    if event_type == "INPUT":
        status = plotter.on_input(event)
        if status == DoraStatus.CONTINUE:
            pass
        elif status == DoraStatus.STOP:
            print("plotter returned stop status")
            break
    elif event_type == "STOP":
        print("received stop")
    else:
        print("received unexpected event:", event_type)
--- a/examples/rerun-viewer/run.rs
+++ b/examples/rerun-viewer/run.rs
@@ -1,5 +1,4 @@
 use dora_core::{get_pip_path, get_python_path, run};
 use dora_download::download_file;
 use dora_tracing::set_up_tracing;
 use eyre::{bail, ContextCompat, WrapErr};
 use std::path::Path;
@@ -51,20 +50,13 @@ async fn main() -> eyre::Result<()> {
        );
    }

    run(
        get_python_path().context("Could not get pip binary")?,
        &["-m", "pip", "install", "--upgrade", "pip"],
        None,
    )
    .await
    .context("failed to install pip")?;
    run(
        get_pip_path().context("Could not get pip binary")?,
        &["install", "-r", "requirements.txt"],
        None,
        &["install", "maturin"],
        Some(venv),
    )
    .await
    .context("pip install failed")?;
    .context("pip install maturin failed")?;

    run(
        "maturin",
@@ -73,12 +65,6 @@ async fn main() -> eyre::Result<()> {
    )
    .await
    .context("maturin develop failed")?;
    download_file(
        "https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt",
        Path::new("yolov8n.pt"),
    )
    .await
    .context("Could not download weights.")?;

    let dataflow = Path::new("dataflow.yml");
    run_dataflow(dataflow).await?;
@@ -88,6 +74,16 @@ async fn main() -> eyre::Result<()> {

 async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> {
    let cargo = std::env::var("CARGO").unwrap();

    // First build the dataflow (install requirements)
    let mut cmd = tokio::process::Command::new(&cargo);
    cmd.arg("run");
    cmd.arg("--package").arg("dora-cli");
    cmd.arg("--").arg("build").arg(dataflow);
    if !cmd.status().await?.success() {
        bail!("failed to run dataflow");
    };

    let mut cmd = tokio::process::Command::new(&cargo);
    cmd.arg("run");
    cmd.arg("--package").arg("dora-cli");
--- a/examples/rerun-viewer/webcam.py
+++ b/examples/rerun-viewer/webcam.py
@@ -1,56 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import os
 import time
 import numpy as np
 import cv2

 from dora import Node
 import pyarrow as pa

 node = Node()

 IMAGE_INDEX = int(os.getenv("IMAGE_INDEX", 0))
 IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
 IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))
 video_capture = cv2.VideoCapture(IMAGE_INDEX)
 video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, IMAGE_WIDTH)
 video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, IMAGE_HEIGHT)
 font = cv2.FONT_HERSHEY_SIMPLEX

 start = time.time()

 # Run for 20 seconds
 while time.time() - start < 1000:
    # Wait next dora_input
    event = node.next()
    if event is None:
        break

    event_type = event["type"]
    if event_type == "INPUT":
        ret, frame = video_capture.read()
        if not ret:
            frame = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=np.uint8)
            cv2.putText(
                frame,
                "No Webcam was found at index %d" % (IMAGE_INDEX),
                (int(30), int(30)),
                font,
                0.75,
                (255, 255, 255),
                2,
                1,
            )
        if len(frame) != IMAGE_HEIGHT * IMAGE_WIDTH * 3:
            print("frame size is not correct")
            frame = cv2.resize(frame, (IMAGE_WIDTH, IMAGE_HEIGHT))

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        node.send_output(
            "image",
            pa.array(frame.ravel()),
            event["metadata"],
        )
        node.send_output("text", pa.array([f"send image at: {time.time()}"]))
--- a/node-hub/dora-rerun/README.md
+++ b/node-hub/dora-rerun/README.md
@@ -25,15 +25,15 @@ cargo install --git https://github.com/dora-rs/dora dora-rerun
      text: webcam/text
      boxes2d: object_detection/bbox
    envs:
      IMAGE_WIDTH: 960
      IMAGE_HEIGHT: 540
      IMAGE_DEPTH: 3
      RERUN_MEMORY_LIMIT: 25%
 ```

 ## Input definition

 - image: UInt8Array + metadata { "width": int, "height": int, "encoding": str }
 - boxes2D: StructArray + metadata { "format": str }
 - text: StringArray

 ## Configurations

 - IMAGE_WIDTH: Image width in pixels
 - IMAGE_HEIGHT: Image height in heights
 - IMAGE_DEPTH: Image depth
 - RERUN_MEMORY_LIMIT: Rerun memory limit
--- a/node-hub/dora-rerun/src/main.rs
+++ b/node-hub/dora-rerun/src/main.rs
@@ -3,12 +3,15 @@
 use std::env::VarError;

 use dora_node_api::{
    arrow::array::{Float32Array, StringArray, UInt8Array},
    DoraNode, Event,
    arrow::{
        array::{AsArray, StringArray, StructArray, UInt8Array},
        datatypes::Float32Type,
    },
    DoraNode, Event, Parameter,
 };
 use eyre::{eyre, Context, Result};
 use eyre::{eyre, Context, ContextCompat, Result};
 use rerun::{
    external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension,
    external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, Text,
 };

 fn main() -> Result<()> {
@@ -39,60 +42,65 @@ fn main() -> Result<()> {
        .context("Could not spawn rerun visualization")?;

    while let Some(event) = events.recv() {
        if let Event::Input {
            id,
            data,
            metadata: _,
        } = event
        {
        if let Event::Input { id, data, metadata } = event {
            if id.as_str().contains("image") {
                let height =
                    if let Some(Parameter::Integer(height)) = metadata.parameters.get("height") {
                        height
                    } else {
                        &480
                    };
                let width =
                    if let Some(Parameter::Integer(width)) = metadata.parameters.get("width") {
                        width
                    } else {
                        &640
                    };
                let encoding = if let Some(Parameter::String(encoding)) =
                    metadata.parameters.get("encoding")
                {
                    encoding
                } else {
                    "bgr8"
                };
                let channels = if encoding == "bgr8" { 3 } else { 3 };

                let shape = vec![
                    TensorDimension {
                        name: Some("height".into()),
                        size: std::env::var(format!("{}_HEIGHT", id.as_str().to_uppercase()))
                            .context(format!(
                                "Could not read {}_HEIGHT env variable for parsing the image",
                                id.as_str().to_uppercase()
                            ))?
                            .parse()
                            .context(format!(
                                "Could not parse env {}_HEIGHT",
                                id.as_str().to_uppercase()
                            ))?,
                        size: *height as u64,
                    },
                    TensorDimension {
                        name: Some("width".into()),
                        size: std::env::var(format!("{}_WIDTH", id.as_str().to_uppercase()))
                            .context(format!(
                                "Could not read {}_WIDTH env variable for parsing the image",
                                id.as_str().to_uppercase()
                            ))?
                            .parse()
                            .context(format!(
                                "Could not parse env {}_WIDTH",
                                id.as_str().to_uppercase()
                            ))?,
                        size: *width as u64,
                    },
                    TensorDimension {
                        name: Some("depth".into()),
                        size: std::env::var(format!("{}_DEPTH", id.as_str().to_uppercase()))
                            .context(format!(
                                "Could not read {}_DEPTH env variable for parsing the image",
                                id.as_str().to_uppercase()
                            ))?
                            .parse()
                            .context(format!(
                                "Could not parse env {}_DEPTH",
                                id.as_str().to_uppercase()
                            ))?,
                        size: channels as u64,
                    },
                ];

                let buffer: UInt8Array = data.to_data().into();
                let buffer: &[u8] = buffer.values();
                let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
                let tensordata = TensorData::new(shape.clone(), buffer);
                let image = rerun::Image::new(tensordata);
                let image = if encoding == "bgr8" {
                    let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap();
                    let buffer: &[u8] = buffer.values();

                    // Transpose values from BGR to RGB
                    let buffer: Vec<u8> =
                        buffer.chunks(3).flat_map(|x| [x[2], x[1], x[0]]).collect();
                    let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
                    let tensordata = TensorData::new(shape.clone(), buffer);

                    rerun::Image::new(tensordata)
                } else if encoding == "rgb8" {
                    let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap();
                    let buffer: &[u8] = buffer.values();
                    let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
                    let tensordata = TensorData::new(shape.clone(), buffer);

                    rerun::Image::new(tensordata)
                } else {
                    unimplemented!("We haven't worked on additional encodings.")
                };

                rec.log(id.as_str(), &image)
                    .context("could not log image")?;
@@ -107,21 +115,73 @@ fn main() -> Result<()> {
                    }
                })?;
            } else if id.as_str().contains("boxes2d") {
                let buffer: Float32Array = data.to_data().into();
                let buffer: &[f32] = buffer.values();
                let bbox_struct: StructArray = data.to_data().into();
                let format =
                    if let Some(Parameter::String(format)) = metadata.parameters.get("format") {
                        format
                    } else {
                        "xyxy"
                    };

                // Cast Bbox
                let bbox_buffer = bbox_struct
                    .column_by_name("bbox")
                    .context("Did not find labels field within bbox struct")?;
                let bbox = bbox_buffer
                    .as_list_opt::<i32>()
                    .context("Could not deserialize bbox as list")?
                    .values();
                let bbox = bbox
                    .as_primitive_opt::<Float32Type>()
                    .context("Could not get bbox value as list")?
                    .values();

                // Cast Labels
                let labels_buffer = bbox_struct
                    .column_by_name("labels")
                    .context("Did not find labels field within bbox struct")?;
                let labels = labels_buffer
                    .as_list_opt::<i32>()
                    .context("Could not deserialize labels as list")?
                    .values();
                let labels = labels
                    .as_string_opt::<i32>()
                    .context("Could not deserialize labels as string")?;
                let labels: Vec<Text> = labels.iter().map(|x| Text::from(x.unwrap())).collect();

                // Cast confidence
                let conf_buffer = bbox_struct
                    .column_by_name("conf")
                    .context("Did not find conf field within bbox struct")?;
                let conf = conf_buffer
                    .as_list_opt::<i32>()
                    .context("Could not deserialize conf as list")?
                    .values();
                let _conf = conf
                    .as_primitive_opt::<Float32Type>()
                    .context("Could not deserialize conf as string")?;

                let mut centers = vec![];
                let mut sizes = vec![];
                let mut classes = vec![];
                buffer.chunks(6).for_each(|block| {
                    if let [x, y, w, h, _conf, cls] = block {
                        centers.push((*x, *y));
                        sizes.push((*w, *h));
                        classes.push(*cls as u16);
                    }
                });

                if format == "xywh" {
                    bbox.chunks(4).for_each(|block| {
                        if let [x, y, w, h] = block {
                            centers.push((*x, *y));
                            sizes.push((*w, *h));
                        }
                    });
                } else if format == "xyxy" {
                    bbox.chunks(4).for_each(|block| {
                        if let [min_x, min_y, max_x, max_y] = block {
                            centers.push(((max_x + min_x) / 2., (max_y + min_y) / 2.));
                            sizes.push(((max_x - min_x), (max_y - min_y)));
                        }
                    });
                }
                rec.log(
                    id.as_str(),
                    &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_class_ids(classes),
                    &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_labels(labels),
                )
                .wrap_err("Could not log Boxes2D")?;
            }
--- a/node-hub/opencv-plot/README.md
+++ b/node-hub/opencv-plot/README.md
@@ -23,21 +23,36 @@ This node is used to plot a text and a list of bbox on a base image (ideal for o
 - `image`: Arrow array containing the base image

 ```python
 image: {
    "width": np.uint32,
    "height": np.uint32,
    "encoding": bytes,
    "data": np.array  # flattened image data
 ## Image data
 image_data: UInt8Array # Example: pa.array(img.ravel())
 metadata = {
  "width": 640,
  "height": 480,
  "encoding": str, # bgr8, rgb8
 }

 encoded_image = pa.array([image])
 ## Example
 node.send_output(
  image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
  )

 decoded_image = {
    "width": np.uint32(encoded_image[0]["width"]),
    "height": np.uint32(encoded_image[0]["height"]),
    "encoding": encoded_image[0]["encoding"].as_py(),
    "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
 }
 ## Decoding
 storage = event["value"]

 metadata = event["metadata"]
 encoding = metadata["encoding"]
 width = metadata["width"]
 height = metadata["height"]

 if encoding == "bgr8":
    channels = 3
    storage_type = np.uint8

 frame = (
    storage.to_numpy()
    .astype(storage_type)
    .reshape((height, width, channels))
 )
 ```

 - `bbox`: an arrow array containing the bounding boxes, confidence scores, and class names of the detected objects
@@ -47,15 +62,15 @@ decoded_image = {
 bbox: {
    "bbox": np.array,  # flattened array of bounding boxes
    "conf": np.array,  # flat array of confidence scores
    "names": np.array,  # flat array of class names
    "labels": np.array,  # flat array of class names
 }

 encoded_bbox = pa.array([bbox])
 encoded_bbox = pa.array([bbox], {"format": "xyxy"})

 decoded_bbox = {
    "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3),
    "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4),
    "conf": encoded_bbox[0]["conf"].values.to_numpy(),
    "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False),
    "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False),
 }
 ```

--- a/node-hub/opencv-plot/opencv_plot/main.py
+++ b/node-hub/opencv-plot/opencv_plot/main.py
@@ -16,7 +16,7 @@ class Plot:
    bboxes: dict = {
        "bbox": np.array([]),
        "conf": np.array([]),
        "names": np.array([]),
        "labels": np.array([]),
    }

    text: str = ""
@@ -26,7 +26,7 @@ class Plot:


 def plot_frame(plot):
    for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["names"]):
    for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["labels"]):
        [
            [min_x, min_y, max_x, max_y],
            confidence,
@@ -139,26 +139,57 @@ def main():
                if encoding == "bgr8":
                    channels = 3
                    storage_type = np.uint8
                    plot.frame = (
                        storage.to_numpy()
                        .astype(storage_type)
                        .reshape((height, width, channels))
                        .copy()  # Copy So that we can add annotation on the image
                    )
                elif encoding == "rgb8":
                    channels = 3
                    storage_type = np.uint8
                    frame = (
                        storage.to_numpy()
                        .astype(storage_type)
                        .reshape((height, width, channels))
                    )

                    plot.frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                else:
                    raise RuntimeError(f"Unsupported image encoding: {encoding}")

                plot.frame = (
                    storage.to_numpy()
                    .astype(storage_type)
                    .reshape((height, width, channels))
                    .copy()  # Copy So that we can add annotation on the image
                )

                plot_frame(plot)
                if not RUNNER_CI:
                    if cv2.waitKey(1) & 0xFF == ord("q"):
                        break
            elif event_id == "bbox":
                arrow_bbox = event["value"][0]
                bbox_format = event["metadata"]["format"]

                if bbox_format == "xyxy":
                    bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4)
                elif bbox_format == "xywh":
                    original_bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4)
                    bbox = np.array(
                        [
                            (
                                x - w / 2,
                                y - h / 2,
                                x + w / 2,
                                y + h / 2,
                            )
                            for [x, y, w, h] in original_bbox
                        ]
                    )
                else:
                    raise RuntimeError(f"Unsupported bbox format: {bbox_format}")

                plot.bboxes = {
                    "bbox": arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4),
                    "bbox": bbox,
                    "conf": arrow_bbox["conf"].values.to_numpy(),
                    "names": arrow_bbox["names"].values.to_numpy(zero_copy_only=False),
                    "labels": arrow_bbox["labels"].values.to_numpy(
                        zero_copy_only=False
                    ),
                }
            elif event_id == "text":
                plot.text = event["value"][0].as_py()
--- a/node-hub/opencv-video-capture/README.md
+++ b/node-hub/opencv-video-capture/README.md
@@ -29,22 +29,36 @@ This node is used to capture video from a camera using OpenCV.
 - `image`: an arrow array containing the captured image

 ```Python

 image: {
    "width": np.uint32,
    "height": np.uint32,
    "encoding": str,
    "data": np.array  # flattened image data
 ## Image data
 image_data: UInt8Array # Example: pa.array(img.ravel())
 metadata = {
  "width": 640,
  "height": 480,
  "encoding": str, # bgr8, rgb8
 }

 encoded_image = pa.array([image])
 ## Example
 node.send_output(
  image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
  )

 decoded_image = {
    "width": np.uint32(encoded_image[0]["width"]),
    "height": np.uint32(encoded_image[0]["height"]),
    "encoding": encoded_image[0]["encoding"].as_py(),
    "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
 }
 ## Decoding
 storage = event["value"]

 metadata = event["metadata"]
 encoding = metadata["encoding"]
 width = metadata["width"]
 height = metadata["height"]

 if encoding == "bgr8":
    channels = 3
    storage_type = np.uint8

 frame = (
    storage.to_numpy()
    .astype(storage_type)
    .reshape((height, width, channels))
 )
 ```

 ## License
--- a/node-hub/opencv-video-capture/opencv_video_capture/main.py
+++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py
@@ -50,6 +50,7 @@ def main():
    args = parser.parse_args()

    video_capture_path = os.getenv("CAPTURE_PATH", args.path)
    encoding = os.getenv("ENCODING", "bgr8")

    if isinstance(video_capture_path, str) and video_capture_path.isnumeric():
        video_capture_path = int(video_capture_path)
@@ -102,15 +103,25 @@ def main():
                    )

                # resize the frame
                if image_width is not None and image_height is not None:
                if (
                    image_width is not None
                    and image_height is not None
                    and (
                        frame.shape[1] != image_width or frame.shape[0] != image_height
                    )
                ):
                    frame = cv2.resize(frame, (image_width, image_height))

                # Get the right encoding
                if encoding == "rgb8":
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                storage = pa.array(frame.ravel())

                metadata = event["metadata"]
                metadata["width"] = int(frame.shape[1])
                metadata["height"] = int(frame.shape[0])
                metadata["encoding"] = "bgr8"
                metadata["encoding"] = encoding

                node.send_output("image", storage, metadata)

--- a/node-hub/ultralytics-yolo/README.md
+++ b/node-hub/ultralytics-yolo/README.md
@@ -5,16 +5,16 @@ This node is used to detect objects in images using YOLOv8.
 # YAML

 ```yaml
  - id: object_detection
    build: pip install ../../node-hub/ultralytics-yolo
    path: ultralytics-yolo
    inputs:
      image: webcam/image

    outputs:
      - bbox
    env:
      MODEL: yolov5n.pt
 - id: object_detection
  build: pip install ../../node-hub/ultralytics-yolo
  path: ultralytics-yolo
  inputs:
    image: webcam/image

  outputs:
    - bbox
  env:
    MODEL: yolov5n.pt
 ```

 # Inputs
@@ -22,21 +22,36 @@ This node is used to detect objects in images using YOLOv8.
 - `image`: Arrow array containing the base image

 ```python
 image: {
    "width": np.uint32,
    "height": np.uint32,
    "encoding": str,
    "data": np.array  # flattened image data
 ## Image data
 image_data: UInt8Array # Example: pa.array(img.ravel())
 metadata = {
  "width": 640,
  "height": 480,
  "encoding": str, # bgr8, rgb8
 }

 encoded_image = pa.array([image])
 ## Example
 node.send_output(
  image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
  )

 decoded_image = {
    "width": np.uint32(encoded_image[0]["width"]),
    "height": np.uint32(encoded_image[0]["height"]),
    "encoding": encoded_image[0]["encoding"].as_py(),
    "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
 }
 ## Decoding
 storage = event["value"]

 metadata = event["metadata"]
 encoding = metadata["encoding"]
 width = metadata["width"]
 height = metadata["height"]

 if encoding == "bgr8":
    channels = 3
    storage_type = np.uint8

 frame = (
    storage.to_numpy()
    .astype(storage_type)
    .reshape((height, width, channels))
 )

 ```

@@ -49,15 +64,15 @@ decoded_image = {
 bbox: {
    "bbox": np.array,  # flattened array of bounding boxes
    "conf": np.array,  # flat array of confidence scores
    "names": np.array,  # flat array of class names
    "labels": np.array,  # flat array of class names
 }

 encoded_bbox = pa.array([bbox])
 encoded_bbox = pa.array([bbox], {"format": "xyxy"})

 decoded_bbox = {
    "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3),
    "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4),
    "conf": encoded_bbox[0]["conf"].values.to_numpy(),
    "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False),
    "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False),
 }
 ```

--- a/node-hub/ultralytics-yolo/ultralytics_yolo/main.py
+++ b/node-hub/ultralytics-yolo/ultralytics_yolo/main.py
@@ -32,6 +32,7 @@ def main():
    args = parser.parse_args()

    model_path = os.getenv("MODEL", args.model)
    bbox_format = os.getenv("FORMAT", "xyxy")

    model = YOLO(model_path)
    node = Node(args.name)
@@ -54,6 +55,9 @@ def main():
                if encoding == "bgr8":
                    channels = 3
                    storage_type = np.uint8
                elif encoding == "rgb8":
                    channels = 3
                    storage_type = np.uint8
                else:
                    raise RuntimeError(f"Unsupported image encoding: {encoding}")

@@ -64,12 +68,20 @@ def main():
                )
                if encoding == "bgr8":
                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
                elif encoding == "rgb8":
                    pass
                else:
                    raise RuntimeError(f"Unsupported image encoding: {encoding}")

                results = model(frame, verbose=False)  # includes NMS

                bboxes = np.array(results[0].boxes.xyxy.cpu())
                if bbox_format == "xyxy":
                    bboxes = np.array(results[0].boxes.xyxy.cpu())
                elif bbox_format == "xywh":
                    bboxes = np.array(results[0].boxes.xywh.cpu())
                else:
                    raise RuntimeError(f"Unsupported bbox format: {bbox_format}")

                conf = np.array(results[0].boxes.conf.cpu())
                labels = np.array(results[0].boxes.cls.cpu())

@@ -78,13 +90,17 @@ def main():
                bbox = {
                    "bbox": bboxes.ravel(),
                    "conf": conf,
                    "names": names,
                    "labels": names,
                }
                bbox = pa.array([bbox])

                metadata = event["metadata"]
                metadata["format"] = bbox_format

                node.send_output(
                    "bbox",
                    pa.array([bbox]),
                    event["metadata"],
                    bbox,
                    metadata,
                )

        elif event_type == "ERROR":