diff --git a/examples/rerun-viewer/dataflow.yml b/examples/rerun-viewer/dataflow.yml index 5e179f02..1a484807 100644 --- a/examples/rerun-viewer/dataflow.yml +++ b/examples/rerun-viewer/dataflow.yml @@ -1,48 +1,38 @@ nodes: - - id: webcam - custom: - source: ./webcam.py - inputs: - tick: - source: dora/timer/millis/10 - queue_size: 1000 - outputs: - - image - - text - envs: - IMAGE_WIDTH: 960 - IMAGE_HEIGHT: 540 + - id: camera + build: pip install ../../node-hub/opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/20 + outputs: + - image + env: + CAPTURE_PATH: 0 + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + ENCODING: rgb8 - - - id: object_detection - custom: - source: ./object_detection.py - inputs: - image: webcam/image - outputs: - - bbox - envs: - IMAGE_WIDTH: 960 - IMAGE_HEIGHT: 540 + - id: object-detection + build: pip install -e ../../node-hub/ultralytics-yolo + path: ultralytics-yolo + inputs: + image: + source: camera/image + queue_size: 1 + outputs: + - bbox + env: + MODEL: yolov8n.pt + FORMAT: xywh - id: rerun - custom: - source: dora-rerun - inputs: - image: webcam/image - text: webcam/text - boxes2d: object_detection/bbox - envs: - IMAGE_WIDTH: 960 - IMAGE_HEIGHT: 540 - IMAGE_DEPTH: 3 - - - id: matplotlib - custom: - source: ./plot.py - inputs: - image: webcam/image - bbox: object_detection/bbox - envs: - IMAGE_WIDTH: 960 - IMAGE_HEIGHT: 540 \ No newline at end of file + build: cargo build -p dora-rerun --release + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + boxes2d: object-detection/bbox + env: + RERUN_FLUSH_TICK_SECS: "0.001" + RERUN_MEMORY_LIMIT: 25% diff --git a/examples/rerun-viewer/object_detection.py b/examples/rerun-viewer/object_detection.py deleted file mode 100755 index 2c606be9..00000000 --- a/examples/rerun-viewer/object_detection.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import cv2 -import numpy as np -from ultralytics import YOLO - -from dora import Node -import pyarrow as pa - -model = YOLO("yolov8n.pt") - -node = Node() - -IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) -IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) - -for event in node: - event_type = event["type"] - if event_type == "INPUT": - event_id = event["id"] - if event_id == "image": - print("[object detection] received image input") - image = event["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3)) - - frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) - results = model(frame) # includes NMS - # Process results - boxes = np.array(results[0].boxes.xywh.cpu()) - conf = np.array(results[0].boxes.conf.cpu()) - label = np.array(results[0].boxes.cls.cpu()) - # concatenate them together - arrays = np.concatenate((boxes, conf[:, None], label[:, None]), axis=1) - - node.send_output("bbox", pa.array(arrays.ravel()), event["metadata"]) - else: - print("[object detection] ignoring unexpected input:", event_id) - elif event_type == "STOP": - print("[object detection] received stop") - elif event_type == "ERROR": - print("[object detection] error: ", event["error"]) - else: - print("[object detection] received unexpected event:", event_type) diff --git a/examples/rerun-viewer/plot.py b/examples/rerun-viewer/plot.py deleted file mode 100755 index d6ec8389..00000000 --- a/examples/rerun-viewer/plot.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -from dora import Node -from dora import DoraStatus - -import cv2 -import numpy as np - -CI = os.environ.get("CI") - -font = cv2.FONT_HERSHEY_SIMPLEX - -IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) -IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) - - -class Plotter: - """ - Plot image and bounding box - """ - - def __init__(self): - self.image = [] - self.bboxs = [] - - def on_input( - self, - dora_input, - ) -> DoraStatus: - """ - Put image and bounding box on cv2 window. - - Args: - dora_input["id"] (str): Id of the dora_input declared in the yaml configuration - dora_input["value"] (arrow array): message of the dora_input - """ - if dora_input["id"] == "image": - image = ( - dora_input["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3)) - ) - - image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - self.image = image - - elif dora_input["id"] == "bbox" and len(self.image) != 0: - bboxs = dora_input["value"].to_numpy() - self.bboxs = np.reshape(bboxs, (-1, 6)) - for bbox in self.bboxs: - [ - x, - y, - w, - h, - confidence, - label, - ] = bbox - cv2.rectangle( - self.image, - (int(x - w / 2), int(y - h / 2)), - (int(x + w / 2), int(y + h / 2)), - (0, 255, 0), - 2, - ) - - if CI != "true": - cv2.imshow("frame", self.image) - if cv2.waitKey(1) & 0xFF == ord("q"): - return DoraStatus.STOP - - return DoraStatus.CONTINUE - - -plotter = Plotter() -node = Node() - -for event in node: - event_type = event["type"] - if event_type == "INPUT": - status = plotter.on_input(event) - if status == DoraStatus.CONTINUE: - pass - elif status == DoraStatus.STOP: - print("plotter returned stop status") - break - elif event_type == "STOP": - print("received stop") - else: - print("received unexpected event:", event_type) diff --git a/examples/rerun-viewer/run.rs b/examples/rerun-viewer/run.rs index a14b553f..b575234d 100644 --- a/examples/rerun-viewer/run.rs +++ b/examples/rerun-viewer/run.rs @@ -1,5 +1,4 @@ use dora_core::{get_pip_path, get_python_path, run}; -use dora_download::download_file; use dora_tracing::set_up_tracing; use eyre::{bail, ContextCompat, WrapErr}; use std::path::Path; @@ -51,20 +50,13 @@ async fn main() -> eyre::Result<()> { ); } - run( - get_python_path().context("Could not get pip binary")?, - &["-m", "pip", "install", "--upgrade", "pip"], - None, - ) - .await - .context("failed to install pip")?; run( get_pip_path().context("Could not get pip binary")?, - &["install", "-r", "requirements.txt"], - None, + &["install", "maturin"], + Some(venv), ) .await - .context("pip install failed")?; + .context("pip install maturin failed")?; run( "maturin", @@ -73,12 +65,6 @@ async fn main() -> eyre::Result<()> { ) .await .context("maturin develop failed")?; - download_file( - "https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt", - Path::new("yolov8n.pt"), - ) - .await - .context("Could not download weights.")?; let dataflow = Path::new("dataflow.yml"); run_dataflow(dataflow).await?; @@ -88,6 +74,16 @@ async fn main() -> eyre::Result<()> { async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> { let cargo = std::env::var("CARGO").unwrap(); + + // First build the dataflow (install requirements) + let mut cmd = tokio::process::Command::new(&cargo); + cmd.arg("run"); + cmd.arg("--package").arg("dora-cli"); + cmd.arg("--").arg("build").arg(dataflow); + if !cmd.status().await?.success() { + bail!("failed to run dataflow"); + }; + let mut cmd = tokio::process::Command::new(&cargo); cmd.arg("run"); cmd.arg("--package").arg("dora-cli"); diff --git a/examples/rerun-viewer/webcam.py b/examples/rerun-viewer/webcam.py deleted file mode 100755 index 33a7950d..00000000 --- a/examples/rerun-viewer/webcam.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import time -import numpy as np -import cv2 - -from dora import Node -import pyarrow as pa - -node = Node() - -IMAGE_INDEX = int(os.getenv("IMAGE_INDEX", 0)) -IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) -IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) -video_capture = cv2.VideoCapture(IMAGE_INDEX) -video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, IMAGE_WIDTH) -video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, IMAGE_HEIGHT) -font = cv2.FONT_HERSHEY_SIMPLEX - -start = time.time() - -# Run for 20 seconds -while time.time() - start < 1000: - # Wait next dora_input - event = node.next() - if event is None: - break - - event_type = event["type"] - if event_type == "INPUT": - ret, frame = video_capture.read() - if not ret: - frame = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=np.uint8) - cv2.putText( - frame, - "No Webcam was found at index %d" % (IMAGE_INDEX), - (int(30), int(30)), - font, - 0.75, - (255, 255, 255), - 2, - 1, - ) - if len(frame) != IMAGE_HEIGHT * IMAGE_WIDTH * 3: - print("frame size is not correct") - frame = cv2.resize(frame, (IMAGE_WIDTH, IMAGE_HEIGHT)) - - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - node.send_output( - "image", - pa.array(frame.ravel()), - event["metadata"], - ) - node.send_output("text", pa.array([f"send image at: {time.time()}"])) diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md index 75973851..83c736e7 100644 --- a/node-hub/dora-rerun/README.md +++ b/node-hub/dora-rerun/README.md @@ -25,15 +25,15 @@ cargo install --git https://github.com/dora-rs/dora dora-rerun text: webcam/text boxes2d: object_detection/bbox envs: - IMAGE_WIDTH: 960 - IMAGE_HEIGHT: 540 - IMAGE_DEPTH: 3 RERUN_MEMORY_LIMIT: 25% ``` +## Input definition + +- image: UInt8Array + metadata { "width": int, "height": int, "encoding": str } +- boxes2D: StructArray + metadata { "format": str } +- text: StringArray + ## Configurations -- IMAGE_WIDTH: Image width in pixels -- IMAGE_HEIGHT: Image height in heights -- IMAGE_DEPTH: Image depth - RERUN_MEMORY_LIMIT: Rerun memory limit diff --git a/node-hub/dora-rerun/src/main.rs b/node-hub/dora-rerun/src/main.rs index 3bf8c231..d092e17e 100644 --- a/node-hub/dora-rerun/src/main.rs +++ b/node-hub/dora-rerun/src/main.rs @@ -3,12 +3,15 @@ use std::env::VarError; use dora_node_api::{ - arrow::array::{Float32Array, StringArray, UInt8Array}, - DoraNode, Event, + arrow::{ + array::{AsArray, StringArray, StructArray, UInt8Array}, + datatypes::Float32Type, + }, + DoraNode, Event, Parameter, }; -use eyre::{eyre, Context, Result}; +use eyre::{eyre, Context, ContextCompat, Result}; use rerun::{ - external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, + external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, Text, }; fn main() -> Result<()> { @@ -39,60 +42,65 @@ fn main() -> Result<()> { .context("Could not spawn rerun visualization")?; while let Some(event) = events.recv() { - if let Event::Input { - id, - data, - metadata: _, - } = event - { + if let Event::Input { id, data, metadata } = event { if id.as_str().contains("image") { + let height = + if let Some(Parameter::Integer(height)) = metadata.parameters.get("height") { + height + } else { + &480 + }; + let width = + if let Some(Parameter::Integer(width)) = metadata.parameters.get("width") { + width + } else { + &640 + }; + let encoding = if let Some(Parameter::String(encoding)) = + metadata.parameters.get("encoding") + { + encoding + } else { + "bgr8" + }; + let channels = if encoding == "bgr8" { 3 } else { 3 }; + let shape = vec![ TensorDimension { name: Some("height".into()), - size: std::env::var(format!("{}_HEIGHT", id.as_str().to_uppercase())) - .context(format!( - "Could not read {}_HEIGHT env variable for parsing the image", - id.as_str().to_uppercase() - ))? - .parse() - .context(format!( - "Could not parse env {}_HEIGHT", - id.as_str().to_uppercase() - ))?, + size: *height as u64, }, TensorDimension { name: Some("width".into()), - size: std::env::var(format!("{}_WIDTH", id.as_str().to_uppercase())) - .context(format!( - "Could not read {}_WIDTH env variable for parsing the image", - id.as_str().to_uppercase() - ))? - .parse() - .context(format!( - "Could not parse env {}_WIDTH", - id.as_str().to_uppercase() - ))?, + size: *width as u64, }, TensorDimension { name: Some("depth".into()), - size: std::env::var(format!("{}_DEPTH", id.as_str().to_uppercase())) - .context(format!( - "Could not read {}_DEPTH env variable for parsing the image", - id.as_str().to_uppercase() - ))? - .parse() - .context(format!( - "Could not parse env {}_DEPTH", - id.as_str().to_uppercase() - ))?, + size: channels as u64, }, ]; - let buffer: UInt8Array = data.to_data().into(); - let buffer: &[u8] = buffer.values(); - let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); - let tensordata = TensorData::new(shape.clone(), buffer); - let image = rerun::Image::new(tensordata); + let image = if encoding == "bgr8" { + let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap(); + let buffer: &[u8] = buffer.values(); + + // Transpose values from BGR to RGB + let buffer: Vec = + buffer.chunks(3).flat_map(|x| [x[2], x[1], x[0]]).collect(); + let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); + let tensordata = TensorData::new(shape.clone(), buffer); + + rerun::Image::new(tensordata) + } else if encoding == "rgb8" { + let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap(); + let buffer: &[u8] = buffer.values(); + let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); + let tensordata = TensorData::new(shape.clone(), buffer); + + rerun::Image::new(tensordata) + } else { + unimplemented!("We haven't worked on additional encodings.") + }; rec.log(id.as_str(), &image) .context("could not log image")?; @@ -107,21 +115,73 @@ fn main() -> Result<()> { } })?; } else if id.as_str().contains("boxes2d") { - let buffer: Float32Array = data.to_data().into(); - let buffer: &[f32] = buffer.values(); + let bbox_struct: StructArray = data.to_data().into(); + let format = + if let Some(Parameter::String(format)) = metadata.parameters.get("format") { + format + } else { + "xyxy" + }; + + // Cast Bbox + let bbox_buffer = bbox_struct + .column_by_name("bbox") + .context("Did not find labels field within bbox struct")?; + let bbox = bbox_buffer + .as_list_opt::() + .context("Could not deserialize bbox as list")? + .values(); + let bbox = bbox + .as_primitive_opt::() + .context("Could not get bbox value as list")? + .values(); + + // Cast Labels + let labels_buffer = bbox_struct + .column_by_name("labels") + .context("Did not find labels field within bbox struct")?; + let labels = labels_buffer + .as_list_opt::() + .context("Could not deserialize labels as list")? + .values(); + let labels = labels + .as_string_opt::() + .context("Could not deserialize labels as string")?; + let labels: Vec = labels.iter().map(|x| Text::from(x.unwrap())).collect(); + + // Cast confidence + let conf_buffer = bbox_struct + .column_by_name("conf") + .context("Did not find conf field within bbox struct")?; + let conf = conf_buffer + .as_list_opt::() + .context("Could not deserialize conf as list")? + .values(); + let _conf = conf + .as_primitive_opt::() + .context("Could not deserialize conf as string")?; + let mut centers = vec![]; let mut sizes = vec![]; - let mut classes = vec![]; - buffer.chunks(6).for_each(|block| { - if let [x, y, w, h, _conf, cls] = block { - centers.push((*x, *y)); - sizes.push((*w, *h)); - classes.push(*cls as u16); - } - }); + + if format == "xywh" { + bbox.chunks(4).for_each(|block| { + if let [x, y, w, h] = block { + centers.push((*x, *y)); + sizes.push((*w, *h)); + } + }); + } else if format == "xyxy" { + bbox.chunks(4).for_each(|block| { + if let [min_x, min_y, max_x, max_y] = block { + centers.push(((max_x + min_x) / 2., (max_y + min_y) / 2.)); + sizes.push(((max_x - min_x), (max_y - min_y))); + } + }); + } rec.log( id.as_str(), - &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_class_ids(classes), + &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_labels(labels), ) .wrap_err("Could not log Boxes2D")?; } diff --git a/node-hub/opencv-plot/README.md b/node-hub/opencv-plot/README.md index ee77c86c..0eeb4fa3 100644 --- a/node-hub/opencv-plot/README.md +++ b/node-hub/opencv-plot/README.md @@ -23,21 +23,36 @@ This node is used to plot a text and a list of bbox on a base image (ideal for o - `image`: Arrow array containing the base image ```python -image: { - "width": np.uint32, - "height": np.uint32, - "encoding": bytes, - "data": np.array # flattened image data +## Image data +image_data: UInt8Array # Example: pa.array(img.ravel()) +metadata = { + "width": 640, + "height": 480, + "encoding": str, # bgr8, rgb8 } -encoded_image = pa.array([image]) +## Example +node.send_output( + image_data, {"width": 640, "height": 480, "encoding": "bgr8"} + ) -decoded_image = { - "width": np.uint32(encoded_image[0]["width"]), - "height": np.uint32(encoded_image[0]["height"]), - "encoding": encoded_image[0]["encoding"].as_py(), - "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) -} +## Decoding +storage = event["value"] + +metadata = event["metadata"] +encoding = metadata["encoding"] +width = metadata["width"] +height = metadata["height"] + +if encoding == "bgr8": + channels = 3 + storage_type = np.uint8 + +frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) +) ``` - `bbox`: an arrow array containing the bounding boxes, confidence scores, and class names of the detected objects @@ -47,15 +62,15 @@ decoded_image = { bbox: { "bbox": np.array, # flattened array of bounding boxes "conf": np.array, # flat array of confidence scores - "names": np.array, # flat array of class names + "labels": np.array, # flat array of class names } -encoded_bbox = pa.array([bbox]) +encoded_bbox = pa.array([bbox], {"format": "xyxy"}) decoded_bbox = { - "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3), + "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4), "conf": encoded_bbox[0]["conf"].values.to_numpy(), - "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False), + "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False), } ``` diff --git a/node-hub/opencv-plot/opencv_plot/main.py b/node-hub/opencv-plot/opencv_plot/main.py index 7d8af16e..4dd7adca 100644 --- a/node-hub/opencv-plot/opencv_plot/main.py +++ b/node-hub/opencv-plot/opencv_plot/main.py @@ -16,7 +16,7 @@ class Plot: bboxes: dict = { "bbox": np.array([]), "conf": np.array([]), - "names": np.array([]), + "labels": np.array([]), } text: str = "" @@ -26,7 +26,7 @@ class Plot: def plot_frame(plot): - for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["names"]): + for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["labels"]): [ [min_x, min_y, max_x, max_y], confidence, @@ -139,26 +139,57 @@ def main(): if encoding == "bgr8": channels = 3 storage_type = np.uint8 + plot.frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + .copy() # Copy So that we can add annotation on the image + ) + elif encoding == "rgb8": + channels = 3 + storage_type = np.uint8 + frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) + ) + + plot.frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) else: raise RuntimeError(f"Unsupported image encoding: {encoding}") - plot.frame = ( - storage.to_numpy() - .astype(storage_type) - .reshape((height, width, channels)) - .copy() # Copy So that we can add annotation on the image - ) - plot_frame(plot) if not RUNNER_CI: if cv2.waitKey(1) & 0xFF == ord("q"): break elif event_id == "bbox": arrow_bbox = event["value"][0] + bbox_format = event["metadata"]["format"] + + if bbox_format == "xyxy": + bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4) + elif bbox_format == "xywh": + original_bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4) + bbox = np.array( + [ + ( + x - w / 2, + y - h / 2, + x + w / 2, + y + h / 2, + ) + for [x, y, w, h] in original_bbox + ] + ) + else: + raise RuntimeError(f"Unsupported bbox format: {bbox_format}") + plot.bboxes = { - "bbox": arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4), + "bbox": bbox, "conf": arrow_bbox["conf"].values.to_numpy(), - "names": arrow_bbox["names"].values.to_numpy(zero_copy_only=False), + "labels": arrow_bbox["labels"].values.to_numpy( + zero_copy_only=False + ), } elif event_id == "text": plot.text = event["value"][0].as_py() diff --git a/node-hub/opencv-video-capture/README.md b/node-hub/opencv-video-capture/README.md index 923b9400..f7a6c230 100644 --- a/node-hub/opencv-video-capture/README.md +++ b/node-hub/opencv-video-capture/README.md @@ -29,22 +29,36 @@ This node is used to capture video from a camera using OpenCV. - `image`: an arrow array containing the captured image ```Python - -image: { - "width": np.uint32, - "height": np.uint32, - "encoding": str, - "data": np.array # flattened image data +## Image data +image_data: UInt8Array # Example: pa.array(img.ravel()) +metadata = { + "width": 640, + "height": 480, + "encoding": str, # bgr8, rgb8 } -encoded_image = pa.array([image]) +## Example +node.send_output( + image_data, {"width": 640, "height": 480, "encoding": "bgr8"} + ) -decoded_image = { - "width": np.uint32(encoded_image[0]["width"]), - "height": np.uint32(encoded_image[0]["height"]), - "encoding": encoded_image[0]["encoding"].as_py(), - "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) -} +## Decoding +storage = event["value"] + +metadata = event["metadata"] +encoding = metadata["encoding"] +width = metadata["width"] +height = metadata["height"] + +if encoding == "bgr8": + channels = 3 + storage_type = np.uint8 + +frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) +) ``` ## License diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py index d9dee5e1..0f4e29ab 100644 --- a/node-hub/opencv-video-capture/opencv_video_capture/main.py +++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py @@ -50,6 +50,7 @@ def main(): args = parser.parse_args() video_capture_path = os.getenv("CAPTURE_PATH", args.path) + encoding = os.getenv("ENCODING", "bgr8") if isinstance(video_capture_path, str) and video_capture_path.isnumeric(): video_capture_path = int(video_capture_path) @@ -102,15 +103,25 @@ def main(): ) # resize the frame - if image_width is not None and image_height is not None: + if ( + image_width is not None + and image_height is not None + and ( + frame.shape[1] != image_width or frame.shape[0] != image_height + ) + ): frame = cv2.resize(frame, (image_width, image_height)) + # Get the right encoding + if encoding == "rgb8": + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + storage = pa.array(frame.ravel()) metadata = event["metadata"] metadata["width"] = int(frame.shape[1]) metadata["height"] = int(frame.shape[0]) - metadata["encoding"] = "bgr8" + metadata["encoding"] = encoding node.send_output("image", storage, metadata) diff --git a/node-hub/ultralytics-yolo/README.md b/node-hub/ultralytics-yolo/README.md index fae47540..56703531 100644 --- a/node-hub/ultralytics-yolo/README.md +++ b/node-hub/ultralytics-yolo/README.md @@ -5,16 +5,16 @@ This node is used to detect objects in images using YOLOv8. # YAML ```yaml - - id: object_detection - build: pip install ../../node-hub/ultralytics-yolo - path: ultralytics-yolo - inputs: - image: webcam/image - - outputs: - - bbox - env: - MODEL: yolov5n.pt +- id: object_detection + build: pip install ../../node-hub/ultralytics-yolo + path: ultralytics-yolo + inputs: + image: webcam/image + + outputs: + - bbox + env: + MODEL: yolov5n.pt ``` # Inputs @@ -22,21 +22,36 @@ This node is used to detect objects in images using YOLOv8. - `image`: Arrow array containing the base image ```python -image: { - "width": np.uint32, - "height": np.uint32, - "encoding": str, - "data": np.array # flattened image data +## Image data +image_data: UInt8Array # Example: pa.array(img.ravel()) +metadata = { + "width": 640, + "height": 480, + "encoding": str, # bgr8, rgb8 } -encoded_image = pa.array([image]) +## Example +node.send_output( + image_data, {"width": 640, "height": 480, "encoding": "bgr8"} + ) -decoded_image = { - "width": np.uint32(encoded_image[0]["width"]), - "height": np.uint32(encoded_image[0]["height"]), - "encoding": encoded_image[0]["encoding"].as_py(), - "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) -} +## Decoding +storage = event["value"] + +metadata = event["metadata"] +encoding = metadata["encoding"] +width = metadata["width"] +height = metadata["height"] + +if encoding == "bgr8": + channels = 3 + storage_type = np.uint8 + +frame = ( + storage.to_numpy() + .astype(storage_type) + .reshape((height, width, channels)) +) ``` @@ -49,15 +64,15 @@ decoded_image = { bbox: { "bbox": np.array, # flattened array of bounding boxes "conf": np.array, # flat array of confidence scores - "names": np.array, # flat array of class names + "labels": np.array, # flat array of class names } -encoded_bbox = pa.array([bbox]) +encoded_bbox = pa.array([bbox], {"format": "xyxy"}) decoded_bbox = { - "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3), + "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4), "conf": encoded_bbox[0]["conf"].values.to_numpy(), - "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False), + "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False), } ``` diff --git a/node-hub/ultralytics-yolo/ultralytics_yolo/main.py b/node-hub/ultralytics-yolo/ultralytics_yolo/main.py index 23ca85b0..42ef980f 100644 --- a/node-hub/ultralytics-yolo/ultralytics_yolo/main.py +++ b/node-hub/ultralytics-yolo/ultralytics_yolo/main.py @@ -32,6 +32,7 @@ def main(): args = parser.parse_args() model_path = os.getenv("MODEL", args.model) + bbox_format = os.getenv("FORMAT", "xyxy") model = YOLO(model_path) node = Node(args.name) @@ -54,6 +55,9 @@ def main(): if encoding == "bgr8": channels = 3 storage_type = np.uint8 + elif encoding == "rgb8": + channels = 3 + storage_type = np.uint8 else: raise RuntimeError(f"Unsupported image encoding: {encoding}") @@ -64,12 +68,20 @@ def main(): ) if encoding == "bgr8": frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) + elif encoding == "rgb8": + pass else: raise RuntimeError(f"Unsupported image encoding: {encoding}") results = model(frame, verbose=False) # includes NMS - bboxes = np.array(results[0].boxes.xyxy.cpu()) + if bbox_format == "xyxy": + bboxes = np.array(results[0].boxes.xyxy.cpu()) + elif bbox_format == "xywh": + bboxes = np.array(results[0].boxes.xywh.cpu()) + else: + raise RuntimeError(f"Unsupported bbox format: {bbox_format}") + conf = np.array(results[0].boxes.conf.cpu()) labels = np.array(results[0].boxes.cls.cpu()) @@ -78,13 +90,17 @@ def main(): bbox = { "bbox": bboxes.ravel(), "conf": conf, - "names": names, + "labels": names, } + bbox = pa.array([bbox]) + + metadata = event["metadata"] + metadata["format"] = bbox_format node.send_output( "bbox", - pa.array([bbox]), - event["metadata"], + bbox, + metadata, ) elif event_type == "ERROR":