Browse Source

Refactor rerun example by using metadata from both image and bbox definition to make our input more generalistic.

Rewrite README documentation to reflect metadata changes
tags/v0.3.6-rc0
haixuanTao 1 year ago
parent
commit
dc0d2f2515
13 changed files with 343 additions and 386 deletions
  1. +34
    -44
      examples/rerun-viewer/dataflow.yml
  2. +0
    -45
      examples/rerun-viewer/object_detection.py
  3. +0
    -90
      examples/rerun-viewer/plot.py
  4. +13
    -17
      examples/rerun-viewer/run.rs
  5. +0
    -56
      examples/rerun-viewer/webcam.py
  6. +6
    -6
      node-hub/dora-rerun/README.md
  7. +116
    -56
      node-hub/dora-rerun/src/main.rs
  8. +31
    -16
      node-hub/opencv-plot/README.md
  9. +42
    -11
      node-hub/opencv-plot/opencv_plot/main.py
  10. +27
    -13
      node-hub/opencv-video-capture/README.md
  11. +13
    -2
      node-hub/opencv-video-capture/opencv_video_capture/main.py
  12. +41
    -26
      node-hub/ultralytics-yolo/README.md
  13. +20
    -4
      node-hub/ultralytics-yolo/ultralytics_yolo/main.py

+ 34
- 44
examples/rerun-viewer/dataflow.yml View File

@@ -1,48 +1,38 @@
nodes:
- id: webcam
custom:
source: ./webcam.py
inputs:
tick:
source: dora/timer/millis/10
queue_size: 1000
outputs:
- image
- text
envs:
IMAGE_WIDTH: 960
IMAGE_HEIGHT: 540
- id: camera
build: pip install ../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/20
outputs:
- image
env:
CAPTURE_PATH: 0
IMAGE_WIDTH: 640
IMAGE_HEIGHT: 480
ENCODING: rgb8


- id: object_detection
custom:
source: ./object_detection.py
inputs:
image: webcam/image
outputs:
- bbox
envs:
IMAGE_WIDTH: 960
IMAGE_HEIGHT: 540
- id: object-detection
build: pip install -e ../../node-hub/ultralytics-yolo
path: ultralytics-yolo
inputs:
image:
source: camera/image
queue_size: 1
outputs:
- bbox
env:
MODEL: yolov8n.pt
FORMAT: xywh

- id: rerun
custom:
source: dora-rerun
inputs:
image: webcam/image
text: webcam/text
boxes2d: object_detection/bbox
envs:
IMAGE_WIDTH: 960
IMAGE_HEIGHT: 540
IMAGE_DEPTH: 3

- id: matplotlib
custom:
source: ./plot.py
inputs:
image: webcam/image
bbox: object_detection/bbox
envs:
IMAGE_WIDTH: 960
IMAGE_HEIGHT: 540
build: cargo build -p dora-rerun --release
path: dora-rerun
inputs:
image:
source: camera/image
queue_size: 1
boxes2d: object-detection/bbox
env:
RERUN_FLUSH_TICK_SECS: "0.001"
RERUN_MEMORY_LIMIT: 25%

+ 0
- 45
examples/rerun-viewer/object_detection.py View File

@@ -1,45 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import cv2
import numpy as np
from ultralytics import YOLO

from dora import Node
import pyarrow as pa

model = YOLO("yolov8n.pt")

node = Node()

IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))

for event in node:
event_type = event["type"]
if event_type == "INPUT":
event_id = event["id"]
if event_id == "image":
print("[object detection] received image input")
image = event["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3))

frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
results = model(frame) # includes NMS
# Process results
boxes = np.array(results[0].boxes.xywh.cpu())
conf = np.array(results[0].boxes.conf.cpu())
label = np.array(results[0].boxes.cls.cpu())
# concatenate them together
arrays = np.concatenate((boxes, conf[:, None], label[:, None]), axis=1)

node.send_output("bbox", pa.array(arrays.ravel()), event["metadata"])
else:
print("[object detection] ignoring unexpected input:", event_id)
elif event_type == "STOP":
print("[object detection] received stop")
elif event_type == "ERROR":
print("[object detection] error: ", event["error"])
else:
print("[object detection] received unexpected event:", event_type)

+ 0
- 90
examples/rerun-viewer/plot.py View File

@@ -1,90 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
from dora import Node
from dora import DoraStatus

import cv2
import numpy as np

CI = os.environ.get("CI")

font = cv2.FONT_HERSHEY_SIMPLEX

IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))


class Plotter:
"""
Plot image and bounding box
"""

def __init__(self):
self.image = []
self.bboxs = []

def on_input(
self,
dora_input,
) -> DoraStatus:
"""
Put image and bounding box on cv2 window.

Args:
dora_input["id"] (str): Id of the dora_input declared in the yaml configuration
dora_input["value"] (arrow array): message of the dora_input
"""
if dora_input["id"] == "image":
image = (
dora_input["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3))
)

image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
self.image = image

elif dora_input["id"] == "bbox" and len(self.image) != 0:
bboxs = dora_input["value"].to_numpy()
self.bboxs = np.reshape(bboxs, (-1, 6))
for bbox in self.bboxs:
[
x,
y,
w,
h,
confidence,
label,
] = bbox
cv2.rectangle(
self.image,
(int(x - w / 2), int(y - h / 2)),
(int(x + w / 2), int(y + h / 2)),
(0, 255, 0),
2,
)

if CI != "true":
cv2.imshow("frame", self.image)
if cv2.waitKey(1) & 0xFF == ord("q"):
return DoraStatus.STOP

return DoraStatus.CONTINUE


plotter = Plotter()
node = Node()

for event in node:
event_type = event["type"]
if event_type == "INPUT":
status = plotter.on_input(event)
if status == DoraStatus.CONTINUE:
pass
elif status == DoraStatus.STOP:
print("plotter returned stop status")
break
elif event_type == "STOP":
print("received stop")
else:
print("received unexpected event:", event_type)

+ 13
- 17
examples/rerun-viewer/run.rs View File

@@ -1,5 +1,4 @@
use dora_core::{get_pip_path, get_python_path, run};
use dora_download::download_file;
use dora_tracing::set_up_tracing;
use eyre::{bail, ContextCompat, WrapErr};
use std::path::Path;
@@ -51,20 +50,13 @@ async fn main() -> eyre::Result<()> {
);
}

run(
get_python_path().context("Could not get pip binary")?,
&["-m", "pip", "install", "--upgrade", "pip"],
None,
)
.await
.context("failed to install pip")?;
run(
get_pip_path().context("Could not get pip binary")?,
&["install", "-r", "requirements.txt"],
None,
&["install", "maturin"],
Some(venv),
)
.await
.context("pip install failed")?;
.context("pip install maturin failed")?;

run(
"maturin",
@@ -73,12 +65,6 @@ async fn main() -> eyre::Result<()> {
)
.await
.context("maturin develop failed")?;
download_file(
"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt",
Path::new("yolov8n.pt"),
)
.await
.context("Could not download weights.")?;

let dataflow = Path::new("dataflow.yml");
run_dataflow(dataflow).await?;
@@ -88,6 +74,16 @@ async fn main() -> eyre::Result<()> {

async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> {
let cargo = std::env::var("CARGO").unwrap();

// First build the dataflow (install requirements)
let mut cmd = tokio::process::Command::new(&cargo);
cmd.arg("run");
cmd.arg("--package").arg("dora-cli");
cmd.arg("--").arg("build").arg(dataflow);
if !cmd.status().await?.success() {
bail!("failed to run dataflow");
};

let mut cmd = tokio::process::Command::new(&cargo);
cmd.arg("run");
cmd.arg("--package").arg("dora-cli");


+ 0
- 56
examples/rerun-viewer/webcam.py View File

@@ -1,56 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import time
import numpy as np
import cv2

from dora import Node
import pyarrow as pa

node = Node()

IMAGE_INDEX = int(os.getenv("IMAGE_INDEX", 0))
IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960))
IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540))
video_capture = cv2.VideoCapture(IMAGE_INDEX)
video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, IMAGE_WIDTH)
video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, IMAGE_HEIGHT)
font = cv2.FONT_HERSHEY_SIMPLEX

start = time.time()

# Run for 20 seconds
while time.time() - start < 1000:
# Wait next dora_input
event = node.next()
if event is None:
break

event_type = event["type"]
if event_type == "INPUT":
ret, frame = video_capture.read()
if not ret:
frame = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=np.uint8)
cv2.putText(
frame,
"No Webcam was found at index %d" % (IMAGE_INDEX),
(int(30), int(30)),
font,
0.75,
(255, 255, 255),
2,
1,
)
if len(frame) != IMAGE_HEIGHT * IMAGE_WIDTH * 3:
print("frame size is not correct")
frame = cv2.resize(frame, (IMAGE_WIDTH, IMAGE_HEIGHT))

frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
node.send_output(
"image",
pa.array(frame.ravel()),
event["metadata"],
)
node.send_output("text", pa.array([f"send image at: {time.time()}"]))

+ 6
- 6
node-hub/dora-rerun/README.md View File

@@ -25,15 +25,15 @@ cargo install --git https://github.com/dora-rs/dora dora-rerun
text: webcam/text
boxes2d: object_detection/bbox
envs:
IMAGE_WIDTH: 960
IMAGE_HEIGHT: 540
IMAGE_DEPTH: 3
RERUN_MEMORY_LIMIT: 25%
```

## Input definition

- image: UInt8Array + metadata { "width": int, "height": int, "encoding": str }
- boxes2D: StructArray + metadata { "format": str }
- text: StringArray

## Configurations

- IMAGE_WIDTH: Image width in pixels
- IMAGE_HEIGHT: Image height in heights
- IMAGE_DEPTH: Image depth
- RERUN_MEMORY_LIMIT: Rerun memory limit

+ 116
- 56
node-hub/dora-rerun/src/main.rs View File

@@ -3,12 +3,15 @@
use std::env::VarError;

use dora_node_api::{
arrow::array::{Float32Array, StringArray, UInt8Array},
DoraNode, Event,
arrow::{
array::{AsArray, StringArray, StructArray, UInt8Array},
datatypes::Float32Type,
},
DoraNode, Event, Parameter,
};
use eyre::{eyre, Context, Result};
use eyre::{eyre, Context, ContextCompat, Result};
use rerun::{
external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension,
external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, Text,
};

fn main() -> Result<()> {
@@ -39,60 +42,65 @@ fn main() -> Result<()> {
.context("Could not spawn rerun visualization")?;

while let Some(event) = events.recv() {
if let Event::Input {
id,
data,
metadata: _,
} = event
{
if let Event::Input { id, data, metadata } = event {
if id.as_str().contains("image") {
let height =
if let Some(Parameter::Integer(height)) = metadata.parameters.get("height") {
height
} else {
&480
};
let width =
if let Some(Parameter::Integer(width)) = metadata.parameters.get("width") {
width
} else {
&640
};
let encoding = if let Some(Parameter::String(encoding)) =
metadata.parameters.get("encoding")
{
encoding
} else {
"bgr8"
};
let channels = if encoding == "bgr8" { 3 } else { 3 };

let shape = vec![
TensorDimension {
name: Some("height".into()),
size: std::env::var(format!("{}_HEIGHT", id.as_str().to_uppercase()))
.context(format!(
"Could not read {}_HEIGHT env variable for parsing the image",
id.as_str().to_uppercase()
))?
.parse()
.context(format!(
"Could not parse env {}_HEIGHT",
id.as_str().to_uppercase()
))?,
size: *height as u64,
},
TensorDimension {
name: Some("width".into()),
size: std::env::var(format!("{}_WIDTH", id.as_str().to_uppercase()))
.context(format!(
"Could not read {}_WIDTH env variable for parsing the image",
id.as_str().to_uppercase()
))?
.parse()
.context(format!(
"Could not parse env {}_WIDTH",
id.as_str().to_uppercase()
))?,
size: *width as u64,
},
TensorDimension {
name: Some("depth".into()),
size: std::env::var(format!("{}_DEPTH", id.as_str().to_uppercase()))
.context(format!(
"Could not read {}_DEPTH env variable for parsing the image",
id.as_str().to_uppercase()
))?
.parse()
.context(format!(
"Could not parse env {}_DEPTH",
id.as_str().to_uppercase()
))?,
size: channels as u64,
},
];

let buffer: UInt8Array = data.to_data().into();
let buffer: &[u8] = buffer.values();
let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
let tensordata = TensorData::new(shape.clone(), buffer);
let image = rerun::Image::new(tensordata);
let image = if encoding == "bgr8" {
let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap();
let buffer: &[u8] = buffer.values();

// Transpose values from BGR to RGB
let buffer: Vec<u8> =
buffer.chunks(3).flat_map(|x| [x[2], x[1], x[0]]).collect();
let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
let tensordata = TensorData::new(shape.clone(), buffer);

rerun::Image::new(tensordata)
} else if encoding == "rgb8" {
let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap();
let buffer: &[u8] = buffer.values();
let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer));
let tensordata = TensorData::new(shape.clone(), buffer);

rerun::Image::new(tensordata)
} else {
unimplemented!("We haven't worked on additional encodings.")
};

rec.log(id.as_str(), &image)
.context("could not log image")?;
@@ -107,21 +115,73 @@ fn main() -> Result<()> {
}
})?;
} else if id.as_str().contains("boxes2d") {
let buffer: Float32Array = data.to_data().into();
let buffer: &[f32] = buffer.values();
let bbox_struct: StructArray = data.to_data().into();
let format =
if let Some(Parameter::String(format)) = metadata.parameters.get("format") {
format
} else {
"xyxy"
};

// Cast Bbox
let bbox_buffer = bbox_struct
.column_by_name("bbox")
.context("Did not find labels field within bbox struct")?;
let bbox = bbox_buffer
.as_list_opt::<i32>()
.context("Could not deserialize bbox as list")?
.values();
let bbox = bbox
.as_primitive_opt::<Float32Type>()
.context("Could not get bbox value as list")?
.values();

// Cast Labels
let labels_buffer = bbox_struct
.column_by_name("labels")
.context("Did not find labels field within bbox struct")?;
let labels = labels_buffer
.as_list_opt::<i32>()
.context("Could not deserialize labels as list")?
.values();
let labels = labels
.as_string_opt::<i32>()
.context("Could not deserialize labels as string")?;
let labels: Vec<Text> = labels.iter().map(|x| Text::from(x.unwrap())).collect();

// Cast confidence
let conf_buffer = bbox_struct
.column_by_name("conf")
.context("Did not find conf field within bbox struct")?;
let conf = conf_buffer
.as_list_opt::<i32>()
.context("Could not deserialize conf as list")?
.values();
let _conf = conf
.as_primitive_opt::<Float32Type>()
.context("Could not deserialize conf as string")?;

let mut centers = vec![];
let mut sizes = vec![];
let mut classes = vec![];
buffer.chunks(6).for_each(|block| {
if let [x, y, w, h, _conf, cls] = block {
centers.push((*x, *y));
sizes.push((*w, *h));
classes.push(*cls as u16);
}
});

if format == "xywh" {
bbox.chunks(4).for_each(|block| {
if let [x, y, w, h] = block {
centers.push((*x, *y));
sizes.push((*w, *h));
}
});
} else if format == "xyxy" {
bbox.chunks(4).for_each(|block| {
if let [min_x, min_y, max_x, max_y] = block {
centers.push(((max_x + min_x) / 2., (max_y + min_y) / 2.));
sizes.push(((max_x - min_x), (max_y - min_y)));
}
});
}
rec.log(
id.as_str(),
&rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_class_ids(classes),
&rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_labels(labels),
)
.wrap_err("Could not log Boxes2D")?;
}


+ 31
- 16
node-hub/opencv-plot/README.md View File

@@ -23,21 +23,36 @@ This node is used to plot a text and a list of bbox on a base image (ideal for o
- `image`: Arrow array containing the base image

```python
image: {
"width": np.uint32,
"height": np.uint32,
"encoding": bytes,
"data": np.array # flattened image data
## Image data
image_data: UInt8Array # Example: pa.array(img.ravel())
metadata = {
"width": 640,
"height": 480,
"encoding": str, # bgr8, rgb8
}

encoded_image = pa.array([image])
## Example
node.send_output(
image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
)

decoded_image = {
"width": np.uint32(encoded_image[0]["width"]),
"height": np.uint32(encoded_image[0]["height"]),
"encoding": encoded_image[0]["encoding"].as_py(),
"data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
}
## Decoding
storage = event["value"]

metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

if encoding == "bgr8":
channels = 3
storage_type = np.uint8

frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
```

- `bbox`: an arrow array containing the bounding boxes, confidence scores, and class names of the detected objects
@@ -47,15 +62,15 @@ decoded_image = {
bbox: {
"bbox": np.array, # flattened array of bounding boxes
"conf": np.array, # flat array of confidence scores
"names": np.array, # flat array of class names
"labels": np.array, # flat array of class names
}

encoded_bbox = pa.array([bbox])
encoded_bbox = pa.array([bbox], {"format": "xyxy"})

decoded_bbox = {
"bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3),
"bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4),
"conf": encoded_bbox[0]["conf"].values.to_numpy(),
"names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False),
"labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False),
}
```



+ 42
- 11
node-hub/opencv-plot/opencv_plot/main.py View File

@@ -16,7 +16,7 @@ class Plot:
bboxes: dict = {
"bbox": np.array([]),
"conf": np.array([]),
"names": np.array([]),
"labels": np.array([]),
}

text: str = ""
@@ -26,7 +26,7 @@ class Plot:


def plot_frame(plot):
for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["names"]):
for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["labels"]):
[
[min_x, min_y, max_x, max_y],
confidence,
@@ -139,26 +139,57 @@ def main():
if encoding == "bgr8":
channels = 3
storage_type = np.uint8
plot.frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
.copy() # Copy So that we can add annotation on the image
)
elif encoding == "rgb8":
channels = 3
storage_type = np.uint8
frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)

plot.frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

plot.frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
.copy() # Copy So that we can add annotation on the image
)

plot_frame(plot)
if not RUNNER_CI:
if cv2.waitKey(1) & 0xFF == ord("q"):
break
elif event_id == "bbox":
arrow_bbox = event["value"][0]
bbox_format = event["metadata"]["format"]

if bbox_format == "xyxy":
bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4)
elif bbox_format == "xywh":
original_bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4)
bbox = np.array(
[
(
x - w / 2,
y - h / 2,
x + w / 2,
y + h / 2,
)
for [x, y, w, h] in original_bbox
]
)
else:
raise RuntimeError(f"Unsupported bbox format: {bbox_format}")

plot.bboxes = {
"bbox": arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4),
"bbox": bbox,
"conf": arrow_bbox["conf"].values.to_numpy(),
"names": arrow_bbox["names"].values.to_numpy(zero_copy_only=False),
"labels": arrow_bbox["labels"].values.to_numpy(
zero_copy_only=False
),
}
elif event_id == "text":
plot.text = event["value"][0].as_py()


+ 27
- 13
node-hub/opencv-video-capture/README.md View File

@@ -29,22 +29,36 @@ This node is used to capture video from a camera using OpenCV.
- `image`: an arrow array containing the captured image

```Python
image: {
"width": np.uint32,
"height": np.uint32,
"encoding": str,
"data": np.array # flattened image data
## Image data
image_data: UInt8Array # Example: pa.array(img.ravel())
metadata = {
"width": 640,
"height": 480,
"encoding": str, # bgr8, rgb8
}

encoded_image = pa.array([image])
## Example
node.send_output(
image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
)

decoded_image = {
"width": np.uint32(encoded_image[0]["width"]),
"height": np.uint32(encoded_image[0]["height"]),
"encoding": encoded_image[0]["encoding"].as_py(),
"data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
}
## Decoding
storage = event["value"]

metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

if encoding == "bgr8":
channels = 3
storage_type = np.uint8

frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)
```

## License


+ 13
- 2
node-hub/opencv-video-capture/opencv_video_capture/main.py View File

@@ -50,6 +50,7 @@ def main():
args = parser.parse_args()

video_capture_path = os.getenv("CAPTURE_PATH", args.path)
encoding = os.getenv("ENCODING", "bgr8")

if isinstance(video_capture_path, str) and video_capture_path.isnumeric():
video_capture_path = int(video_capture_path)
@@ -102,15 +103,25 @@ def main():
)

# resize the frame
if image_width is not None and image_height is not None:
if (
image_width is not None
and image_height is not None
and (
frame.shape[1] != image_width or frame.shape[0] != image_height
)
):
frame = cv2.resize(frame, (image_width, image_height))

# Get the right encoding
if encoding == "rgb8":
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

storage = pa.array(frame.ravel())

metadata = event["metadata"]
metadata["width"] = int(frame.shape[1])
metadata["height"] = int(frame.shape[0])
metadata["encoding"] = "bgr8"
metadata["encoding"] = encoding

node.send_output("image", storage, metadata)



+ 41
- 26
node-hub/ultralytics-yolo/README.md View File

@@ -5,16 +5,16 @@ This node is used to detect objects in images using YOLOv8.
# YAML

```yaml
- id: object_detection
build: pip install ../../node-hub/ultralytics-yolo
path: ultralytics-yolo
inputs:
image: webcam/image
outputs:
- bbox
env:
MODEL: yolov5n.pt
- id: object_detection
build: pip install ../../node-hub/ultralytics-yolo
path: ultralytics-yolo
inputs:
image: webcam/image
outputs:
- bbox
env:
MODEL: yolov5n.pt
```

# Inputs
@@ -22,21 +22,36 @@ This node is used to detect objects in images using YOLOv8.
- `image`: Arrow array containing the base image

```python
image: {
"width": np.uint32,
"height": np.uint32,
"encoding": str,
"data": np.array # flattened image data
## Image data
image_data: UInt8Array # Example: pa.array(img.ravel())
metadata = {
"width": 640,
"height": 480,
"encoding": str, # bgr8, rgb8
}

encoded_image = pa.array([image])
## Example
node.send_output(
image_data, {"width": 640, "height": 480, "encoding": "bgr8"}
)

decoded_image = {
"width": np.uint32(encoded_image[0]["width"]),
"height": np.uint32(encoded_image[0]["height"]),
"encoding": encoded_image[0]["encoding"].as_py(),
"data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8)
}
## Decoding
storage = event["value"]

metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

if encoding == "bgr8":
channels = 3
storage_type = np.uint8

frame = (
storage.to_numpy()
.astype(storage_type)
.reshape((height, width, channels))
)

```

@@ -49,15 +64,15 @@ decoded_image = {
bbox: {
"bbox": np.array, # flattened array of bounding boxes
"conf": np.array, # flat array of confidence scores
"names": np.array, # flat array of class names
"labels": np.array, # flat array of class names
}

encoded_bbox = pa.array([bbox])
encoded_bbox = pa.array([bbox], {"format": "xyxy"})

decoded_bbox = {
"bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3),
"bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4),
"conf": encoded_bbox[0]["conf"].values.to_numpy(),
"names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False),
"labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False),
}
```



+ 20
- 4
node-hub/ultralytics-yolo/ultralytics_yolo/main.py View File

@@ -32,6 +32,7 @@ def main():
args = parser.parse_args()

model_path = os.getenv("MODEL", args.model)
bbox_format = os.getenv("FORMAT", "xyxy")

model = YOLO(model_path)
node = Node(args.name)
@@ -54,6 +55,9 @@ def main():
if encoding == "bgr8":
channels = 3
storage_type = np.uint8
elif encoding == "rgb8":
channels = 3
storage_type = np.uint8
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

@@ -64,12 +68,20 @@ def main():
)
if encoding == "bgr8":
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
elif encoding == "rgb8":
pass
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

results = model(frame, verbose=False) # includes NMS

bboxes = np.array(results[0].boxes.xyxy.cpu())
if bbox_format == "xyxy":
bboxes = np.array(results[0].boxes.xyxy.cpu())
elif bbox_format == "xywh":
bboxes = np.array(results[0].boxes.xywh.cpu())
else:
raise RuntimeError(f"Unsupported bbox format: {bbox_format}")

conf = np.array(results[0].boxes.conf.cpu())
labels = np.array(results[0].boxes.cls.cpu())

@@ -78,13 +90,17 @@ def main():
bbox = {
"bbox": bboxes.ravel(),
"conf": conf,
"names": names,
"labels": names,
}
bbox = pa.array([bbox])

metadata = event["metadata"]
metadata["format"] = bbox_format

node.send_output(
"bbox",
pa.array([bbox]),
event["metadata"],
bbox,
metadata,
)

elif event_type == "ERROR":


Loading…
Cancel
Save