Rewrite README documentation to reflect metadata changestags/v0.3.6-rc0
| @@ -1,48 +1,38 @@ | |||||
| nodes: | nodes: | ||||
| - id: webcam | |||||
| custom: | |||||
| source: ./webcam.py | |||||
| inputs: | |||||
| tick: | |||||
| source: dora/timer/millis/10 | |||||
| queue_size: 1000 | |||||
| outputs: | |||||
| - image | |||||
| - text | |||||
| envs: | |||||
| IMAGE_WIDTH: 960 | |||||
| IMAGE_HEIGHT: 540 | |||||
| - id: camera | |||||
| build: pip install ../../node-hub/opencv-video-capture | |||||
| path: opencv-video-capture | |||||
| inputs: | |||||
| tick: dora/timer/millis/20 | |||||
| outputs: | |||||
| - image | |||||
| env: | |||||
| CAPTURE_PATH: 0 | |||||
| IMAGE_WIDTH: 640 | |||||
| IMAGE_HEIGHT: 480 | |||||
| ENCODING: rgb8 | |||||
| - id: object_detection | |||||
| custom: | |||||
| source: ./object_detection.py | |||||
| inputs: | |||||
| image: webcam/image | |||||
| outputs: | |||||
| - bbox | |||||
| envs: | |||||
| IMAGE_WIDTH: 960 | |||||
| IMAGE_HEIGHT: 540 | |||||
| - id: object-detection | |||||
| build: pip install -e ../../node-hub/ultralytics-yolo | |||||
| path: ultralytics-yolo | |||||
| inputs: | |||||
| image: | |||||
| source: camera/image | |||||
| queue_size: 1 | |||||
| outputs: | |||||
| - bbox | |||||
| env: | |||||
| MODEL: yolov8n.pt | |||||
| FORMAT: xywh | |||||
| - id: rerun | - id: rerun | ||||
| custom: | |||||
| source: dora-rerun | |||||
| inputs: | |||||
| image: webcam/image | |||||
| text: webcam/text | |||||
| boxes2d: object_detection/bbox | |||||
| envs: | |||||
| IMAGE_WIDTH: 960 | |||||
| IMAGE_HEIGHT: 540 | |||||
| IMAGE_DEPTH: 3 | |||||
| - id: matplotlib | |||||
| custom: | |||||
| source: ./plot.py | |||||
| inputs: | |||||
| image: webcam/image | |||||
| bbox: object_detection/bbox | |||||
| envs: | |||||
| IMAGE_WIDTH: 960 | |||||
| IMAGE_HEIGHT: 540 | |||||
| build: cargo build -p dora-rerun --release | |||||
| path: dora-rerun | |||||
| inputs: | |||||
| image: | |||||
| source: camera/image | |||||
| queue_size: 1 | |||||
| boxes2d: object-detection/bbox | |||||
| env: | |||||
| RERUN_FLUSH_TICK_SECS: "0.001" | |||||
| RERUN_MEMORY_LIMIT: 25% | |||||
| @@ -1,45 +0,0 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| import os | |||||
| import cv2 | |||||
| import numpy as np | |||||
| from ultralytics import YOLO | |||||
| from dora import Node | |||||
| import pyarrow as pa | |||||
| model = YOLO("yolov8n.pt") | |||||
| node = Node() | |||||
| IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) | |||||
| IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) | |||||
| for event in node: | |||||
| event_type = event["type"] | |||||
| if event_type == "INPUT": | |||||
| event_id = event["id"] | |||||
| if event_id == "image": | |||||
| print("[object detection] received image input") | |||||
| image = event["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3)) | |||||
| frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |||||
| frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) | |||||
| results = model(frame) # includes NMS | |||||
| # Process results | |||||
| boxes = np.array(results[0].boxes.xywh.cpu()) | |||||
| conf = np.array(results[0].boxes.conf.cpu()) | |||||
| label = np.array(results[0].boxes.cls.cpu()) | |||||
| # concatenate them together | |||||
| arrays = np.concatenate((boxes, conf[:, None], label[:, None]), axis=1) | |||||
| node.send_output("bbox", pa.array(arrays.ravel()), event["metadata"]) | |||||
| else: | |||||
| print("[object detection] ignoring unexpected input:", event_id) | |||||
| elif event_type == "STOP": | |||||
| print("[object detection] received stop") | |||||
| elif event_type == "ERROR": | |||||
| print("[object detection] error: ", event["error"]) | |||||
| else: | |||||
| print("[object detection] received unexpected event:", event_type) | |||||
| @@ -1,90 +0,0 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| import os | |||||
| from dora import Node | |||||
| from dora import DoraStatus | |||||
| import cv2 | |||||
| import numpy as np | |||||
| CI = os.environ.get("CI") | |||||
| font = cv2.FONT_HERSHEY_SIMPLEX | |||||
| IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) | |||||
| IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) | |||||
| class Plotter: | |||||
| """ | |||||
| Plot image and bounding box | |||||
| """ | |||||
| def __init__(self): | |||||
| self.image = [] | |||||
| self.bboxs = [] | |||||
| def on_input( | |||||
| self, | |||||
| dora_input, | |||||
| ) -> DoraStatus: | |||||
| """ | |||||
| Put image and bounding box on cv2 window. | |||||
| Args: | |||||
| dora_input["id"] (str): Id of the dora_input declared in the yaml configuration | |||||
| dora_input["value"] (arrow array): message of the dora_input | |||||
| """ | |||||
| if dora_input["id"] == "image": | |||||
| image = ( | |||||
| dora_input["value"].to_numpy().reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3)) | |||||
| ) | |||||
| image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |||||
| self.image = image | |||||
| elif dora_input["id"] == "bbox" and len(self.image) != 0: | |||||
| bboxs = dora_input["value"].to_numpy() | |||||
| self.bboxs = np.reshape(bboxs, (-1, 6)) | |||||
| for bbox in self.bboxs: | |||||
| [ | |||||
| x, | |||||
| y, | |||||
| w, | |||||
| h, | |||||
| confidence, | |||||
| label, | |||||
| ] = bbox | |||||
| cv2.rectangle( | |||||
| self.image, | |||||
| (int(x - w / 2), int(y - h / 2)), | |||||
| (int(x + w / 2), int(y + h / 2)), | |||||
| (0, 255, 0), | |||||
| 2, | |||||
| ) | |||||
| if CI != "true": | |||||
| cv2.imshow("frame", self.image) | |||||
| if cv2.waitKey(1) & 0xFF == ord("q"): | |||||
| return DoraStatus.STOP | |||||
| return DoraStatus.CONTINUE | |||||
| plotter = Plotter() | |||||
| node = Node() | |||||
| for event in node: | |||||
| event_type = event["type"] | |||||
| if event_type == "INPUT": | |||||
| status = plotter.on_input(event) | |||||
| if status == DoraStatus.CONTINUE: | |||||
| pass | |||||
| elif status == DoraStatus.STOP: | |||||
| print("plotter returned stop status") | |||||
| break | |||||
| elif event_type == "STOP": | |||||
| print("received stop") | |||||
| else: | |||||
| print("received unexpected event:", event_type) | |||||
| @@ -1,5 +1,4 @@ | |||||
| use dora_core::{get_pip_path, get_python_path, run}; | use dora_core::{get_pip_path, get_python_path, run}; | ||||
| use dora_download::download_file; | |||||
| use dora_tracing::set_up_tracing; | use dora_tracing::set_up_tracing; | ||||
| use eyre::{bail, ContextCompat, WrapErr}; | use eyre::{bail, ContextCompat, WrapErr}; | ||||
| use std::path::Path; | use std::path::Path; | ||||
| @@ -51,20 +50,13 @@ async fn main() -> eyre::Result<()> { | |||||
| ); | ); | ||||
| } | } | ||||
| run( | |||||
| get_python_path().context("Could not get pip binary")?, | |||||
| &["-m", "pip", "install", "--upgrade", "pip"], | |||||
| None, | |||||
| ) | |||||
| .await | |||||
| .context("failed to install pip")?; | |||||
| run( | run( | ||||
| get_pip_path().context("Could not get pip binary")?, | get_pip_path().context("Could not get pip binary")?, | ||||
| &["install", "-r", "requirements.txt"], | |||||
| None, | |||||
| &["install", "maturin"], | |||||
| Some(venv), | |||||
| ) | ) | ||||
| .await | .await | ||||
| .context("pip install failed")?; | |||||
| .context("pip install maturin failed")?; | |||||
| run( | run( | ||||
| "maturin", | "maturin", | ||||
| @@ -73,12 +65,6 @@ async fn main() -> eyre::Result<()> { | |||||
| ) | ) | ||||
| .await | .await | ||||
| .context("maturin develop failed")?; | .context("maturin develop failed")?; | ||||
| download_file( | |||||
| "https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt", | |||||
| Path::new("yolov8n.pt"), | |||||
| ) | |||||
| .await | |||||
| .context("Could not download weights.")?; | |||||
| let dataflow = Path::new("dataflow.yml"); | let dataflow = Path::new("dataflow.yml"); | ||||
| run_dataflow(dataflow).await?; | run_dataflow(dataflow).await?; | ||||
| @@ -88,6 +74,16 @@ async fn main() -> eyre::Result<()> { | |||||
| async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> { | async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> { | ||||
| let cargo = std::env::var("CARGO").unwrap(); | let cargo = std::env::var("CARGO").unwrap(); | ||||
| // First build the dataflow (install requirements) | |||||
| let mut cmd = tokio::process::Command::new(&cargo); | |||||
| cmd.arg("run"); | |||||
| cmd.arg("--package").arg("dora-cli"); | |||||
| cmd.arg("--").arg("build").arg(dataflow); | |||||
| if !cmd.status().await?.success() { | |||||
| bail!("failed to run dataflow"); | |||||
| }; | |||||
| let mut cmd = tokio::process::Command::new(&cargo); | let mut cmd = tokio::process::Command::new(&cargo); | ||||
| cmd.arg("run"); | cmd.arg("run"); | ||||
| cmd.arg("--package").arg("dora-cli"); | cmd.arg("--package").arg("dora-cli"); | ||||
| @@ -1,56 +0,0 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| import os | |||||
| import time | |||||
| import numpy as np | |||||
| import cv2 | |||||
| from dora import Node | |||||
| import pyarrow as pa | |||||
| node = Node() | |||||
| IMAGE_INDEX = int(os.getenv("IMAGE_INDEX", 0)) | |||||
| IMAGE_WIDTH = int(os.getenv("IMAGE_WIDTH", 960)) | |||||
| IMAGE_HEIGHT = int(os.getenv("IMAGE_HEIGHT", 540)) | |||||
| video_capture = cv2.VideoCapture(IMAGE_INDEX) | |||||
| video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, IMAGE_WIDTH) | |||||
| video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, IMAGE_HEIGHT) | |||||
| font = cv2.FONT_HERSHEY_SIMPLEX | |||||
| start = time.time() | |||||
| # Run for 20 seconds | |||||
| while time.time() - start < 1000: | |||||
| # Wait next dora_input | |||||
| event = node.next() | |||||
| if event is None: | |||||
| break | |||||
| event_type = event["type"] | |||||
| if event_type == "INPUT": | |||||
| ret, frame = video_capture.read() | |||||
| if not ret: | |||||
| frame = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=np.uint8) | |||||
| cv2.putText( | |||||
| frame, | |||||
| "No Webcam was found at index %d" % (IMAGE_INDEX), | |||||
| (int(30), int(30)), | |||||
| font, | |||||
| 0.75, | |||||
| (255, 255, 255), | |||||
| 2, | |||||
| 1, | |||||
| ) | |||||
| if len(frame) != IMAGE_HEIGHT * IMAGE_WIDTH * 3: | |||||
| print("frame size is not correct") | |||||
| frame = cv2.resize(frame, (IMAGE_WIDTH, IMAGE_HEIGHT)) | |||||
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |||||
| node.send_output( | |||||
| "image", | |||||
| pa.array(frame.ravel()), | |||||
| event["metadata"], | |||||
| ) | |||||
| node.send_output("text", pa.array([f"send image at: {time.time()}"])) | |||||
| @@ -25,15 +25,15 @@ cargo install --git https://github.com/dora-rs/dora dora-rerun | |||||
| text: webcam/text | text: webcam/text | ||||
| boxes2d: object_detection/bbox | boxes2d: object_detection/bbox | ||||
| envs: | envs: | ||||
| IMAGE_WIDTH: 960 | |||||
| IMAGE_HEIGHT: 540 | |||||
| IMAGE_DEPTH: 3 | |||||
| RERUN_MEMORY_LIMIT: 25% | RERUN_MEMORY_LIMIT: 25% | ||||
| ``` | ``` | ||||
| ## Input definition | |||||
| - image: UInt8Array + metadata { "width": int, "height": int, "encoding": str } | |||||
| - boxes2D: StructArray + metadata { "format": str } | |||||
| - text: StringArray | |||||
| ## Configurations | ## Configurations | ||||
| - IMAGE_WIDTH: Image width in pixels | |||||
| - IMAGE_HEIGHT: Image height in heights | |||||
| - IMAGE_DEPTH: Image depth | |||||
| - RERUN_MEMORY_LIMIT: Rerun memory limit | - RERUN_MEMORY_LIMIT: Rerun memory limit | ||||
| @@ -3,12 +3,15 @@ | |||||
| use std::env::VarError; | use std::env::VarError; | ||||
| use dora_node_api::{ | use dora_node_api::{ | ||||
| arrow::array::{Float32Array, StringArray, UInt8Array}, | |||||
| DoraNode, Event, | |||||
| arrow::{ | |||||
| array::{AsArray, StringArray, StructArray, UInt8Array}, | |||||
| datatypes::Float32Type, | |||||
| }, | |||||
| DoraNode, Event, Parameter, | |||||
| }; | }; | ||||
| use eyre::{eyre, Context, Result}; | |||||
| use eyre::{eyre, Context, ContextCompat, Result}; | |||||
| use rerun::{ | use rerun::{ | ||||
| external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, | |||||
| external::re_types::ArrowBuffer, SpawnOptions, TensorBuffer, TensorData, TensorDimension, Text, | |||||
| }; | }; | ||||
| fn main() -> Result<()> { | fn main() -> Result<()> { | ||||
| @@ -39,60 +42,65 @@ fn main() -> Result<()> { | |||||
| .context("Could not spawn rerun visualization")?; | .context("Could not spawn rerun visualization")?; | ||||
| while let Some(event) = events.recv() { | while let Some(event) = events.recv() { | ||||
| if let Event::Input { | |||||
| id, | |||||
| data, | |||||
| metadata: _, | |||||
| } = event | |||||
| { | |||||
| if let Event::Input { id, data, metadata } = event { | |||||
| if id.as_str().contains("image") { | if id.as_str().contains("image") { | ||||
| let height = | |||||
| if let Some(Parameter::Integer(height)) = metadata.parameters.get("height") { | |||||
| height | |||||
| } else { | |||||
| &480 | |||||
| }; | |||||
| let width = | |||||
| if let Some(Parameter::Integer(width)) = metadata.parameters.get("width") { | |||||
| width | |||||
| } else { | |||||
| &640 | |||||
| }; | |||||
| let encoding = if let Some(Parameter::String(encoding)) = | |||||
| metadata.parameters.get("encoding") | |||||
| { | |||||
| encoding | |||||
| } else { | |||||
| "bgr8" | |||||
| }; | |||||
| let channels = if encoding == "bgr8" { 3 } else { 3 }; | |||||
| let shape = vec![ | let shape = vec![ | ||||
| TensorDimension { | TensorDimension { | ||||
| name: Some("height".into()), | name: Some("height".into()), | ||||
| size: std::env::var(format!("{}_HEIGHT", id.as_str().to_uppercase())) | |||||
| .context(format!( | |||||
| "Could not read {}_HEIGHT env variable for parsing the image", | |||||
| id.as_str().to_uppercase() | |||||
| ))? | |||||
| .parse() | |||||
| .context(format!( | |||||
| "Could not parse env {}_HEIGHT", | |||||
| id.as_str().to_uppercase() | |||||
| ))?, | |||||
| size: *height as u64, | |||||
| }, | }, | ||||
| TensorDimension { | TensorDimension { | ||||
| name: Some("width".into()), | name: Some("width".into()), | ||||
| size: std::env::var(format!("{}_WIDTH", id.as_str().to_uppercase())) | |||||
| .context(format!( | |||||
| "Could not read {}_WIDTH env variable for parsing the image", | |||||
| id.as_str().to_uppercase() | |||||
| ))? | |||||
| .parse() | |||||
| .context(format!( | |||||
| "Could not parse env {}_WIDTH", | |||||
| id.as_str().to_uppercase() | |||||
| ))?, | |||||
| size: *width as u64, | |||||
| }, | }, | ||||
| TensorDimension { | TensorDimension { | ||||
| name: Some("depth".into()), | name: Some("depth".into()), | ||||
| size: std::env::var(format!("{}_DEPTH", id.as_str().to_uppercase())) | |||||
| .context(format!( | |||||
| "Could not read {}_DEPTH env variable for parsing the image", | |||||
| id.as_str().to_uppercase() | |||||
| ))? | |||||
| .parse() | |||||
| .context(format!( | |||||
| "Could not parse env {}_DEPTH", | |||||
| id.as_str().to_uppercase() | |||||
| ))?, | |||||
| size: channels as u64, | |||||
| }, | }, | ||||
| ]; | ]; | ||||
| let buffer: UInt8Array = data.to_data().into(); | |||||
| let buffer: &[u8] = buffer.values(); | |||||
| let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); | |||||
| let tensordata = TensorData::new(shape.clone(), buffer); | |||||
| let image = rerun::Image::new(tensordata); | |||||
| let image = if encoding == "bgr8" { | |||||
| let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap(); | |||||
| let buffer: &[u8] = buffer.values(); | |||||
| // Transpose values from BGR to RGB | |||||
| let buffer: Vec<u8> = | |||||
| buffer.chunks(3).flat_map(|x| [x[2], x[1], x[0]]).collect(); | |||||
| let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); | |||||
| let tensordata = TensorData::new(shape.clone(), buffer); | |||||
| rerun::Image::new(tensordata) | |||||
| } else if encoding == "rgb8" { | |||||
| let buffer: &UInt8Array = data.as_any().downcast_ref().unwrap(); | |||||
| let buffer: &[u8] = buffer.values(); | |||||
| let buffer = TensorBuffer::U8(ArrowBuffer::from(buffer)); | |||||
| let tensordata = TensorData::new(shape.clone(), buffer); | |||||
| rerun::Image::new(tensordata) | |||||
| } else { | |||||
| unimplemented!("We haven't worked on additional encodings.") | |||||
| }; | |||||
| rec.log(id.as_str(), &image) | rec.log(id.as_str(), &image) | ||||
| .context("could not log image")?; | .context("could not log image")?; | ||||
| @@ -107,21 +115,73 @@ fn main() -> Result<()> { | |||||
| } | } | ||||
| })?; | })?; | ||||
| } else if id.as_str().contains("boxes2d") { | } else if id.as_str().contains("boxes2d") { | ||||
| let buffer: Float32Array = data.to_data().into(); | |||||
| let buffer: &[f32] = buffer.values(); | |||||
| let bbox_struct: StructArray = data.to_data().into(); | |||||
| let format = | |||||
| if let Some(Parameter::String(format)) = metadata.parameters.get("format") { | |||||
| format | |||||
| } else { | |||||
| "xyxy" | |||||
| }; | |||||
| // Cast Bbox | |||||
| let bbox_buffer = bbox_struct | |||||
| .column_by_name("bbox") | |||||
| .context("Did not find labels field within bbox struct")?; | |||||
| let bbox = bbox_buffer | |||||
| .as_list_opt::<i32>() | |||||
| .context("Could not deserialize bbox as list")? | |||||
| .values(); | |||||
| let bbox = bbox | |||||
| .as_primitive_opt::<Float32Type>() | |||||
| .context("Could not get bbox value as list")? | |||||
| .values(); | |||||
| // Cast Labels | |||||
| let labels_buffer = bbox_struct | |||||
| .column_by_name("labels") | |||||
| .context("Did not find labels field within bbox struct")?; | |||||
| let labels = labels_buffer | |||||
| .as_list_opt::<i32>() | |||||
| .context("Could not deserialize labels as list")? | |||||
| .values(); | |||||
| let labels = labels | |||||
| .as_string_opt::<i32>() | |||||
| .context("Could not deserialize labels as string")?; | |||||
| let labels: Vec<Text> = labels.iter().map(|x| Text::from(x.unwrap())).collect(); | |||||
| // Cast confidence | |||||
| let conf_buffer = bbox_struct | |||||
| .column_by_name("conf") | |||||
| .context("Did not find conf field within bbox struct")?; | |||||
| let conf = conf_buffer | |||||
| .as_list_opt::<i32>() | |||||
| .context("Could not deserialize conf as list")? | |||||
| .values(); | |||||
| let _conf = conf | |||||
| .as_primitive_opt::<Float32Type>() | |||||
| .context("Could not deserialize conf as string")?; | |||||
| let mut centers = vec![]; | let mut centers = vec![]; | ||||
| let mut sizes = vec![]; | let mut sizes = vec![]; | ||||
| let mut classes = vec![]; | |||||
| buffer.chunks(6).for_each(|block| { | |||||
| if let [x, y, w, h, _conf, cls] = block { | |||||
| centers.push((*x, *y)); | |||||
| sizes.push((*w, *h)); | |||||
| classes.push(*cls as u16); | |||||
| } | |||||
| }); | |||||
| if format == "xywh" { | |||||
| bbox.chunks(4).for_each(|block| { | |||||
| if let [x, y, w, h] = block { | |||||
| centers.push((*x, *y)); | |||||
| sizes.push((*w, *h)); | |||||
| } | |||||
| }); | |||||
| } else if format == "xyxy" { | |||||
| bbox.chunks(4).for_each(|block| { | |||||
| if let [min_x, min_y, max_x, max_y] = block { | |||||
| centers.push(((max_x + min_x) / 2., (max_y + min_y) / 2.)); | |||||
| sizes.push(((max_x - min_x), (max_y - min_y))); | |||||
| } | |||||
| }); | |||||
| } | |||||
| rec.log( | rec.log( | ||||
| id.as_str(), | id.as_str(), | ||||
| &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_class_ids(classes), | |||||
| &rerun::Boxes2D::from_centers_and_sizes(centers, sizes).with_labels(labels), | |||||
| ) | ) | ||||
| .wrap_err("Could not log Boxes2D")?; | .wrap_err("Could not log Boxes2D")?; | ||||
| } | } | ||||
| @@ -23,21 +23,36 @@ This node is used to plot a text and a list of bbox on a base image (ideal for o | |||||
| - `image`: Arrow array containing the base image | - `image`: Arrow array containing the base image | ||||
| ```python | ```python | ||||
| image: { | |||||
| "width": np.uint32, | |||||
| "height": np.uint32, | |||||
| "encoding": bytes, | |||||
| "data": np.array # flattened image data | |||||
| ## Image data | |||||
| image_data: UInt8Array # Example: pa.array(img.ravel()) | |||||
| metadata = { | |||||
| "width": 640, | |||||
| "height": 480, | |||||
| "encoding": str, # bgr8, rgb8 | |||||
| } | } | ||||
| encoded_image = pa.array([image]) | |||||
| ## Example | |||||
| node.send_output( | |||||
| image_data, {"width": 640, "height": 480, "encoding": "bgr8"} | |||||
| ) | |||||
| decoded_image = { | |||||
| "width": np.uint32(encoded_image[0]["width"]), | |||||
| "height": np.uint32(encoded_image[0]["height"]), | |||||
| "encoding": encoded_image[0]["encoding"].as_py(), | |||||
| "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) | |||||
| } | |||||
| ## Decoding | |||||
| storage = event["value"] | |||||
| metadata = event["metadata"] | |||||
| encoding = metadata["encoding"] | |||||
| width = metadata["width"] | |||||
| height = metadata["height"] | |||||
| if encoding == "bgr8": | |||||
| channels = 3 | |||||
| storage_type = np.uint8 | |||||
| frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| ) | |||||
| ``` | ``` | ||||
| - `bbox`: an arrow array containing the bounding boxes, confidence scores, and class names of the detected objects | - `bbox`: an arrow array containing the bounding boxes, confidence scores, and class names of the detected objects | ||||
| @@ -47,15 +62,15 @@ decoded_image = { | |||||
| bbox: { | bbox: { | ||||
| "bbox": np.array, # flattened array of bounding boxes | "bbox": np.array, # flattened array of bounding boxes | ||||
| "conf": np.array, # flat array of confidence scores | "conf": np.array, # flat array of confidence scores | ||||
| "names": np.array, # flat array of class names | |||||
| "labels": np.array, # flat array of class names | |||||
| } | } | ||||
| encoded_bbox = pa.array([bbox]) | |||||
| encoded_bbox = pa.array([bbox], {"format": "xyxy"}) | |||||
| decoded_bbox = { | decoded_bbox = { | ||||
| "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3), | |||||
| "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4), | |||||
| "conf": encoded_bbox[0]["conf"].values.to_numpy(), | "conf": encoded_bbox[0]["conf"].values.to_numpy(), | ||||
| "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False), | |||||
| "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False), | |||||
| } | } | ||||
| ``` | ``` | ||||
| @@ -16,7 +16,7 @@ class Plot: | |||||
| bboxes: dict = { | bboxes: dict = { | ||||
| "bbox": np.array([]), | "bbox": np.array([]), | ||||
| "conf": np.array([]), | "conf": np.array([]), | ||||
| "names": np.array([]), | |||||
| "labels": np.array([]), | |||||
| } | } | ||||
| text: str = "" | text: str = "" | ||||
| @@ -26,7 +26,7 @@ class Plot: | |||||
| def plot_frame(plot): | def plot_frame(plot): | ||||
| for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["names"]): | |||||
| for bbox in zip(plot.bboxes["bbox"], plot.bboxes["conf"], plot.bboxes["labels"]): | |||||
| [ | [ | ||||
| [min_x, min_y, max_x, max_y], | [min_x, min_y, max_x, max_y], | ||||
| confidence, | confidence, | ||||
| @@ -139,26 +139,57 @@ def main(): | |||||
| if encoding == "bgr8": | if encoding == "bgr8": | ||||
| channels = 3 | channels = 3 | ||||
| storage_type = np.uint8 | storage_type = np.uint8 | ||||
| plot.frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| .copy() # Copy So that we can add annotation on the image | |||||
| ) | |||||
| elif encoding == "rgb8": | |||||
| channels = 3 | |||||
| storage_type = np.uint8 | |||||
| frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| ) | |||||
| plot.frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |||||
| else: | else: | ||||
| raise RuntimeError(f"Unsupported image encoding: {encoding}") | raise RuntimeError(f"Unsupported image encoding: {encoding}") | ||||
| plot.frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| .copy() # Copy So that we can add annotation on the image | |||||
| ) | |||||
| plot_frame(plot) | plot_frame(plot) | ||||
| if not RUNNER_CI: | if not RUNNER_CI: | ||||
| if cv2.waitKey(1) & 0xFF == ord("q"): | if cv2.waitKey(1) & 0xFF == ord("q"): | ||||
| break | break | ||||
| elif event_id == "bbox": | elif event_id == "bbox": | ||||
| arrow_bbox = event["value"][0] | arrow_bbox = event["value"][0] | ||||
| bbox_format = event["metadata"]["format"] | |||||
| if bbox_format == "xyxy": | |||||
| bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4) | |||||
| elif bbox_format == "xywh": | |||||
| original_bbox = arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4) | |||||
| bbox = np.array( | |||||
| [ | |||||
| ( | |||||
| x - w / 2, | |||||
| y - h / 2, | |||||
| x + w / 2, | |||||
| y + h / 2, | |||||
| ) | |||||
| for [x, y, w, h] in original_bbox | |||||
| ] | |||||
| ) | |||||
| else: | |||||
| raise RuntimeError(f"Unsupported bbox format: {bbox_format}") | |||||
| plot.bboxes = { | plot.bboxes = { | ||||
| "bbox": arrow_bbox["bbox"].values.to_numpy().reshape(-1, 4), | |||||
| "bbox": bbox, | |||||
| "conf": arrow_bbox["conf"].values.to_numpy(), | "conf": arrow_bbox["conf"].values.to_numpy(), | ||||
| "names": arrow_bbox["names"].values.to_numpy(zero_copy_only=False), | |||||
| "labels": arrow_bbox["labels"].values.to_numpy( | |||||
| zero_copy_only=False | |||||
| ), | |||||
| } | } | ||||
| elif event_id == "text": | elif event_id == "text": | ||||
| plot.text = event["value"][0].as_py() | plot.text = event["value"][0].as_py() | ||||
| @@ -29,22 +29,36 @@ This node is used to capture video from a camera using OpenCV. | |||||
| - `image`: an arrow array containing the captured image | - `image`: an arrow array containing the captured image | ||||
| ```Python | ```Python | ||||
| image: { | |||||
| "width": np.uint32, | |||||
| "height": np.uint32, | |||||
| "encoding": str, | |||||
| "data": np.array # flattened image data | |||||
| ## Image data | |||||
| image_data: UInt8Array # Example: pa.array(img.ravel()) | |||||
| metadata = { | |||||
| "width": 640, | |||||
| "height": 480, | |||||
| "encoding": str, # bgr8, rgb8 | |||||
| } | } | ||||
| encoded_image = pa.array([image]) | |||||
| ## Example | |||||
| node.send_output( | |||||
| image_data, {"width": 640, "height": 480, "encoding": "bgr8"} | |||||
| ) | |||||
| decoded_image = { | |||||
| "width": np.uint32(encoded_image[0]["width"]), | |||||
| "height": np.uint32(encoded_image[0]["height"]), | |||||
| "encoding": encoded_image[0]["encoding"].as_py(), | |||||
| "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) | |||||
| } | |||||
| ## Decoding | |||||
| storage = event["value"] | |||||
| metadata = event["metadata"] | |||||
| encoding = metadata["encoding"] | |||||
| width = metadata["width"] | |||||
| height = metadata["height"] | |||||
| if encoding == "bgr8": | |||||
| channels = 3 | |||||
| storage_type = np.uint8 | |||||
| frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| ) | |||||
| ``` | ``` | ||||
| ## License | ## License | ||||
| @@ -50,6 +50,7 @@ def main(): | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| video_capture_path = os.getenv("CAPTURE_PATH", args.path) | video_capture_path = os.getenv("CAPTURE_PATH", args.path) | ||||
| encoding = os.getenv("ENCODING", "bgr8") | |||||
| if isinstance(video_capture_path, str) and video_capture_path.isnumeric(): | if isinstance(video_capture_path, str) and video_capture_path.isnumeric(): | ||||
| video_capture_path = int(video_capture_path) | video_capture_path = int(video_capture_path) | ||||
| @@ -102,15 +103,25 @@ def main(): | |||||
| ) | ) | ||||
| # resize the frame | # resize the frame | ||||
| if image_width is not None and image_height is not None: | |||||
| if ( | |||||
| image_width is not None | |||||
| and image_height is not None | |||||
| and ( | |||||
| frame.shape[1] != image_width or frame.shape[0] != image_height | |||||
| ) | |||||
| ): | |||||
| frame = cv2.resize(frame, (image_width, image_height)) | frame = cv2.resize(frame, (image_width, image_height)) | ||||
| # Get the right encoding | |||||
| if encoding == "rgb8": | |||||
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |||||
| storage = pa.array(frame.ravel()) | storage = pa.array(frame.ravel()) | ||||
| metadata = event["metadata"] | metadata = event["metadata"] | ||||
| metadata["width"] = int(frame.shape[1]) | metadata["width"] = int(frame.shape[1]) | ||||
| metadata["height"] = int(frame.shape[0]) | metadata["height"] = int(frame.shape[0]) | ||||
| metadata["encoding"] = "bgr8" | |||||
| metadata["encoding"] = encoding | |||||
| node.send_output("image", storage, metadata) | node.send_output("image", storage, metadata) | ||||
| @@ -5,16 +5,16 @@ This node is used to detect objects in images using YOLOv8. | |||||
| # YAML | # YAML | ||||
| ```yaml | ```yaml | ||||
| - id: object_detection | |||||
| build: pip install ../../node-hub/ultralytics-yolo | |||||
| path: ultralytics-yolo | |||||
| inputs: | |||||
| image: webcam/image | |||||
| outputs: | |||||
| - bbox | |||||
| env: | |||||
| MODEL: yolov5n.pt | |||||
| - id: object_detection | |||||
| build: pip install ../../node-hub/ultralytics-yolo | |||||
| path: ultralytics-yolo | |||||
| inputs: | |||||
| image: webcam/image | |||||
| outputs: | |||||
| - bbox | |||||
| env: | |||||
| MODEL: yolov5n.pt | |||||
| ``` | ``` | ||||
| # Inputs | # Inputs | ||||
| @@ -22,21 +22,36 @@ This node is used to detect objects in images using YOLOv8. | |||||
| - `image`: Arrow array containing the base image | - `image`: Arrow array containing the base image | ||||
| ```python | ```python | ||||
| image: { | |||||
| "width": np.uint32, | |||||
| "height": np.uint32, | |||||
| "encoding": str, | |||||
| "data": np.array # flattened image data | |||||
| ## Image data | |||||
| image_data: UInt8Array # Example: pa.array(img.ravel()) | |||||
| metadata = { | |||||
| "width": 640, | |||||
| "height": 480, | |||||
| "encoding": str, # bgr8, rgb8 | |||||
| } | } | ||||
| encoded_image = pa.array([image]) | |||||
| ## Example | |||||
| node.send_output( | |||||
| image_data, {"width": 640, "height": 480, "encoding": "bgr8"} | |||||
| ) | |||||
| decoded_image = { | |||||
| "width": np.uint32(encoded_image[0]["width"]), | |||||
| "height": np.uint32(encoded_image[0]["height"]), | |||||
| "encoding": encoded_image[0]["encoding"].as_py(), | |||||
| "data": encoded_image[0]["data"].values.to_numpy().astype(np.uint8) | |||||
| } | |||||
| ## Decoding | |||||
| storage = event["value"] | |||||
| metadata = event["metadata"] | |||||
| encoding = metadata["encoding"] | |||||
| width = metadata["width"] | |||||
| height = metadata["height"] | |||||
| if encoding == "bgr8": | |||||
| channels = 3 | |||||
| storage_type = np.uint8 | |||||
| frame = ( | |||||
| storage.to_numpy() | |||||
| .astype(storage_type) | |||||
| .reshape((height, width, channels)) | |||||
| ) | |||||
| ``` | ``` | ||||
| @@ -49,15 +64,15 @@ decoded_image = { | |||||
| bbox: { | bbox: { | ||||
| "bbox": np.array, # flattened array of bounding boxes | "bbox": np.array, # flattened array of bounding boxes | ||||
| "conf": np.array, # flat array of confidence scores | "conf": np.array, # flat array of confidence scores | ||||
| "names": np.array, # flat array of class names | |||||
| "labels": np.array, # flat array of class names | |||||
| } | } | ||||
| encoded_bbox = pa.array([bbox]) | |||||
| encoded_bbox = pa.array([bbox], {"format": "xyxy"}) | |||||
| decoded_bbox = { | decoded_bbox = { | ||||
| "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 3), | |||||
| "bbox": encoded_bbox[0]["bbox"].values.to_numpy().reshape(-1, 4), | |||||
| "conf": encoded_bbox[0]["conf"].values.to_numpy(), | "conf": encoded_bbox[0]["conf"].values.to_numpy(), | ||||
| "names": encoded_bbox[0]["names"].values.to_numpy(zero_copy_only=False), | |||||
| "labels": encoded_bbox[0]["labels"].values.to_numpy(zero_copy_only=False), | |||||
| } | } | ||||
| ``` | ``` | ||||
| @@ -32,6 +32,7 @@ def main(): | |||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| model_path = os.getenv("MODEL", args.model) | model_path = os.getenv("MODEL", args.model) | ||||
| bbox_format = os.getenv("FORMAT", "xyxy") | |||||
| model = YOLO(model_path) | model = YOLO(model_path) | ||||
| node = Node(args.name) | node = Node(args.name) | ||||
| @@ -54,6 +55,9 @@ def main(): | |||||
| if encoding == "bgr8": | if encoding == "bgr8": | ||||
| channels = 3 | channels = 3 | ||||
| storage_type = np.uint8 | storage_type = np.uint8 | ||||
| elif encoding == "rgb8": | |||||
| channels = 3 | |||||
| storage_type = np.uint8 | |||||
| else: | else: | ||||
| raise RuntimeError(f"Unsupported image encoding: {encoding}") | raise RuntimeError(f"Unsupported image encoding: {encoding}") | ||||
| @@ -64,12 +68,20 @@ def main(): | |||||
| ) | ) | ||||
| if encoding == "bgr8": | if encoding == "bgr8": | ||||
| frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) | frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB) | ||||
| elif encoding == "rgb8": | |||||
| pass | |||||
| else: | else: | ||||
| raise RuntimeError(f"Unsupported image encoding: {encoding}") | raise RuntimeError(f"Unsupported image encoding: {encoding}") | ||||
| results = model(frame, verbose=False) # includes NMS | results = model(frame, verbose=False) # includes NMS | ||||
| bboxes = np.array(results[0].boxes.xyxy.cpu()) | |||||
| if bbox_format == "xyxy": | |||||
| bboxes = np.array(results[0].boxes.xyxy.cpu()) | |||||
| elif bbox_format == "xywh": | |||||
| bboxes = np.array(results[0].boxes.xywh.cpu()) | |||||
| else: | |||||
| raise RuntimeError(f"Unsupported bbox format: {bbox_format}") | |||||
| conf = np.array(results[0].boxes.conf.cpu()) | conf = np.array(results[0].boxes.conf.cpu()) | ||||
| labels = np.array(results[0].boxes.cls.cpu()) | labels = np.array(results[0].boxes.cls.cpu()) | ||||
| @@ -78,13 +90,17 @@ def main(): | |||||
| bbox = { | bbox = { | ||||
| "bbox": bboxes.ravel(), | "bbox": bboxes.ravel(), | ||||
| "conf": conf, | "conf": conf, | ||||
| "names": names, | |||||
| "labels": names, | |||||
| } | } | ||||
| bbox = pa.array([bbox]) | |||||
| metadata = event["metadata"] | |||||
| metadata["format"] = bbox_format | |||||
| node.send_output( | node.send_output( | ||||
| "bbox", | "bbox", | ||||
| pa.array([bbox]), | |||||
| event["metadata"], | |||||
| bbox, | |||||
| metadata, | |||||
| ) | ) | ||||
| elif event_type == "ERROR": | elif event_type == "ERROR": | ||||