From 0e6942b1bd9aba2ba91cebc2dc0af986adafc47a Mon Sep 17 00:00:00 2001 From: rozgo Date: Thu, 31 Jul 2025 23:29:08 -0600 Subject: [PATCH] fixes examples for refactored rerun --- examples/av1-encoding/dataflow.yml | 2 +- examples/camera/dataflow_jupyter.yml | 2 +- examples/camera/dataflow_rerun.yml | 2 +- examples/lebai/nodes/key_interpolation.py | 38 ++-- examples/python-multi-env/dataflow.yml | 2 +- examples/reachy2-remote/parse_bbox.py | 3 +- examples/reachy2-remote/parse_whisper.py | 14 +- examples/rerun-viewer/dataflow.yml | 2 +- .../dora_argotranslate/main.py | 2 +- node-hub/dora-dav1d/src/lib.rs | 24 +++ .../dora_distil_whisper/main.py | 2 +- node-hub/dora-keyboard/dora_keyboard/main.py | 2 +- .../dora-qwen2-5-vl/dora_qwen2_5_vl/main.py | 1 + node-hub/dora-qwenvl/dora_qwenvl/main.py | 4 +- node-hub/dora-rav1e/src/lib.rs | 15 ++ node-hub/dora-reachy2/dora_reachy2/camera.py | 4 + node-hub/dora-rerun/README.md | 181 ++++++++++++++++-- node-hub/dora-sam2/dora_sam2/main.py | 4 +- node-hub/dora-yolo/dora_yolo/main.py | 1 + .../llama_factory_recorder/main.py | 1 + .../opencv_video_capture/main.py | 1 + 21 files changed, 251 insertions(+), 56 deletions(-) diff --git a/examples/av1-encoding/dataflow.yml b/examples/av1-encoding/dataflow.yml index 2dbcef01..7517bde6 100644 --- a/examples/av1-encoding/dataflow.yml +++ b/examples/av1-encoding/dataflow.yml @@ -1,6 +1,6 @@ nodes: - id: camera - build: pip install ../../node-hub/opencv-video-capture + build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture _unstable_deploy: machine: encoder diff --git a/examples/camera/dataflow_jupyter.yml b/examples/camera/dataflow_jupyter.yml index c76e1a8f..5402e75e 100644 --- a/examples/camera/dataflow_jupyter.yml +++ b/examples/camera/dataflow_jupyter.yml @@ -1,6 +1,6 @@ nodes: - id: camera - build: pip install ../../node-hub/opencv-video-capture + build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture inputs: tick: dora/timer/millis/20 diff --git a/examples/camera/dataflow_rerun.yml b/examples/camera/dataflow_rerun.yml index 2633ec94..4ebaa742 100644 --- a/examples/camera/dataflow_rerun.yml +++ b/examples/camera/dataflow_rerun.yml @@ -1,6 +1,6 @@ nodes: - id: camera - build: pip install ../../node-hub/opencv-video-capture + build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture inputs: tick: dora/timer/millis/20 diff --git a/examples/lebai/nodes/key_interpolation.py b/examples/lebai/nodes/key_interpolation.py index 34653baf..382f684e 100644 --- a/examples/lebai/nodes/key_interpolation.py +++ b/examples/lebai/nodes/key_interpolation.py @@ -16,40 +16,40 @@ for event in node: char = event["value"][0].as_py() if char == "w": - node.send_output("text", pa.array(["forward"])) + node.send_output("text", pa.array(["forward"]), {"primitive": "text"}) elif char == "s": - node.send_output("text", pa.array(["back"])) + node.send_output("text", pa.array(["back"]), {"primitive": "text"}) elif char == "c": - node.send_output("text", pa.array([" go home"])) + node.send_output("text", pa.array([" go home"]), {"primitive": "text"}) elif char == "d": - node.send_output("text", pa.array(["right"])) + node.send_output("text", pa.array(["right"]), {"primitive": "text"}) elif char == "a": - node.send_output("text", pa.array(["left"])) + node.send_output("text", pa.array(["left"]), {"primitive": "text"}) elif char == "e": - node.send_output("text", pa.array(["up"])) + node.send_output("text", pa.array(["up"]), {"primitive": "text"}) elif char == "q": - node.send_output("text", pa.array(["down"])) + node.send_output("text", pa.array(["down"]), {"primitive": "text"}) elif char == "t": - node.send_output("text", pa.array(["close"])) + node.send_output("text", pa.array(["close"]), {"primitive": "text"}) elif char == "r": - node.send_output("text", pa.array(["open"])) + node.send_output("text", pa.array(["open"]), {"primitive": "text"}) elif char == "6": - node.send_output("text", pa.array(["yaw right"])) + node.send_output("text", pa.array(["yaw right"]), {"primitive": "text"}) elif char == "4": - node.send_output("text", pa.array(["yaw left"])) + node.send_output("text", pa.array(["yaw left"]), {"primitive": "text"}) elif char == "3": - node.send_output("text", pa.array(["yaw shoulder right"])) + node.send_output("text", pa.array(["yaw shoulder right"]), {"primitive": "text"}) elif char == "1": - node.send_output("text", pa.array(["yaw shoulder left"])) + node.send_output("text", pa.array(["yaw shoulder left"]), {"primitive": "text"}) elif char == "8": - node.send_output("text", pa.array(["pitch up"])) + node.send_output("text", pa.array(["pitch up"]), {"primitive": "text"}) elif char == "2": - node.send_output("text", pa.array(["pitch down"])) + node.send_output("text", pa.array(["pitch down"]), {"primitive": "text"}) elif char == "7": - node.send_output("text", pa.array(["roll left"])) + node.send_output("text", pa.array(["roll left"]), {"primitive": "text"}) elif char == "9": - node.send_output("text", pa.array(["roll right"])) + node.send_output("text", pa.array(["roll right"]), {"primitive": "text"}) elif char == "x": - node.send_output("text", pa.array(["stop"])) + node.send_output("text", pa.array(["stop"]), {"primitive": "text"}) elif char == "j": - node.send_output("text", pa.array([""])) + node.send_output("text", pa.array([""]), {"primitive": "text"}) diff --git a/examples/python-multi-env/dataflow.yml b/examples/python-multi-env/dataflow.yml index 3bea06ab..eac4652e 100644 --- a/examples/python-multi-env/dataflow.yml +++ b/examples/python-multi-env/dataflow.yml @@ -23,7 +23,7 @@ nodes: VIRTUAL_ENV: env_2 - id: plot - build: pip install dora-rerun + build: pip install -e ../../node-hub/dora-rerun path: dora-rerun inputs: image: camera/image diff --git a/examples/reachy2-remote/parse_bbox.py b/examples/reachy2-remote/parse_bbox.py index c1a992a4..0a1380fc 100644 --- a/examples/reachy2-remote/parse_bbox.py +++ b/examples/reachy2-remote/parse_bbox.py @@ -50,7 +50,7 @@ def extract_bboxes(json_text): for event in node: if event["type"] == "INPUT": if len(event["value"]) == 0: - node.send_output("bbox_track", pa.array([])) + node.send_output("bbox_track", pa.array([]), {"primitive": "boxes2d"}) continue text = event["value"][0].as_py() @@ -62,6 +62,7 @@ for event in node: bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) metadata["image_id"] = image_id metadata["encoding"] = "xyxy" + metadata["primitive"] = "boxes2d" if image_id == "image_left": node.send_output( "bbox_track", diff --git a/examples/reachy2-remote/parse_whisper.py b/examples/reachy2-remote/parse_whisper.py index 98ee07de..cdfc0d57 100644 --- a/examples/reachy2-remote/parse_whisper.py +++ b/examples/reachy2-remote/parse_whisper.py @@ -57,16 +57,16 @@ for event in node: node.send_output("points", pa.array([], type=pa.float64())) elif "follow" in text: text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object" - node.send_output("text", pa.array([text]), {"image_id": "image_left"}) + node.send_output("text", pa.array([text]), {"image_id": "image_left", "primitive": "text"}) elif "grab " in text: text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object" node.send_output( - "text", pa.array([text]), {"image_id": "image_depth", "action": "grab"} + "text", pa.array([text]), {"image_id": "image_depth", "action": "grab", "primitive": "text"} ) elif "get " in text: text = f"Given the prompt: {text}. Output the bounding boxes for the object" node.send_output( - "text", pa.array([text]), {"image_id": "image_left", "action": "grab"} + "text", pa.array([text]), {"image_id": "image_left", "action": "grab", "primitive": "text"} ) last_prompt = text elif "put " in text: @@ -74,7 +74,7 @@ for event in node: node.send_output( "text", pa.array([text]), - {"image_id": "image_left", "action": "release"}, + {"image_id": "image_left", "action": "release", "primitive": "text"}, ) last_prompt = text elif "drop " in text: @@ -82,7 +82,7 @@ for event in node: node.send_output( "text", pa.array([text]), - {"image_id": "image_depth", "action": "release"}, + {"image_id": "image_depth", "action": "release", "primitive": "text"}, ) elif "release left" in text: node.send_output("action_release_left", pa.array([1.0])) @@ -123,13 +123,13 @@ for event in node: node.send_output( "text", pa.array([text]), - {"image_id": "image_depth", "action": "grab"}, + {"image_id": "image_depth", "action": "grab", "primitive": "text"}, ) elif "put " in text: text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object" node.send_output( "text", pa.array([text]), - {"image_id": "image_depth", "action": "release"}, + {"image_id": "image_depth", "action": "release", "primitive": "text"}, ) \ No newline at end of file diff --git a/examples/rerun-viewer/dataflow.yml b/examples/rerun-viewer/dataflow.yml index a33d308f..c3448591 100644 --- a/examples/rerun-viewer/dataflow.yml +++ b/examples/rerun-viewer/dataflow.yml @@ -1,6 +1,6 @@ nodes: - id: camera - build: pip install ../../node-hub/opencv-video-capture + build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture inputs: tick: dora/timer/millis/20 diff --git a/node-hub/dora-argotranslate/dora_argotranslate/main.py b/node-hub/dora-argotranslate/dora_argotranslate/main.py index 2bab22ee..5ce8ef5a 100644 --- a/node-hub/dora-argotranslate/dora_argotranslate/main.py +++ b/node-hub/dora-argotranslate/dora_argotranslate/main.py @@ -42,5 +42,5 @@ def main(): node.send_output( "text", pa.array([translated_text]), - {"language": to_code}, + {"language": to_code, "primitive": "text"}, ) diff --git a/node-hub/dora-dav1d/src/lib.rs b/node-hub/dora-dav1d/src/lib.rs index 45e8477d..2d6afc7c 100644 --- a/node-hub/dora-dav1d/src/lib.rs +++ b/node-hub/dora-dav1d/src/lib.rs @@ -112,6 +112,12 @@ pub fn lib_main() -> Result<()> { p.height() as i64 ), ); + metadata.parameters.insert( + "primitive".to_string(), + dora_node_api::Parameter::String( + "image".to_string(), + ), + ); node.send_output(id, metadata.parameters, arrow) .unwrap(); @@ -131,6 +137,12 @@ pub fn lib_main() -> Result<()> { "bgr8".to_string(), ), ); + metadata.parameters.insert( + "primitive".to_string(), + dora_node_api::Parameter::String( + "image".to_string(), + ), + ); node.send_output(id, metadata.parameters, arrow) .unwrap(); } @@ -154,6 +166,12 @@ pub fn lib_main() -> Result<()> { "mono8".to_string(), ), ); + metadata.parameters.insert( + "primitive".to_string(), + dora_node_api::Parameter::String( + "image".to_string(), + ), + ); node.send_output(id, metadata.parameters, arrow) .unwrap(); } @@ -167,6 +185,12 @@ pub fn lib_main() -> Result<()> { "mono16".to_string(), ), ); + metadata.parameters.insert( + "primitive".to_string(), + dora_node_api::Parameter::String( + "image".to_string(), + ), + ); node.send_output(id, metadata.parameters, arrow) .unwrap(); } diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py index 007b3b43..70c54fcf 100644 --- a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py +++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py @@ -236,5 +236,5 @@ def main(): if text.strip() == "" or text.strip() == ".": continue node.send_output( - "text", pa.array([text]), {"language": TARGET_LANGUAGE}, + "text", pa.array([text]), {"language": TARGET_LANGUAGE, "primitive": "text"}, ) diff --git a/node-hub/dora-keyboard/dora_keyboard/main.py b/node-hub/dora-keyboard/dora_keyboard/main.py index 644c10c5..6a950629 100644 --- a/node-hub/dora-keyboard/dora_keyboard/main.py +++ b/node-hub/dora-keyboard/dora_keyboard/main.py @@ -23,7 +23,7 @@ def main(): if event is not None and isinstance(event, Events.Press): if hasattr(event.key, "char"): if event.key.char is not None: - node.send_output("char", pa.array([event.key.char])) + node.send_output("char", pa.array([event.key.char]), {"primitive": "text"}) if __name__ == "__main__": diff --git a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py index 7592bdad..95eb807c 100644 --- a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py +++ b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py @@ -316,6 +316,7 @@ def main(): ) metadata = event["metadata"] metadata["image_id"] = image_id if image_id is not None else "all" + metadata["primitive"] = "text" node.send_output( "text", pa.array([response]), diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index 537e6133..dc7472a2 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -162,7 +162,7 @@ def main(): node.send_output( "tick", pa.array([response]), - {}, + {"primitive": "text"}, ) elif event_id == "text": @@ -176,7 +176,7 @@ def main(): node.send_output( "text", pa.array([response]), - {}, + {"primitive": "text"}, ) elif event_type == "ERROR": diff --git a/node-hub/dora-rav1e/src/lib.rs b/node-hub/dora-rav1e/src/lib.rs index 9ce9f1fd..d9261abe 100644 --- a/node-hub/dora-rav1e/src/lib.rs +++ b/node-hub/dora-rav1e/src/lib.rs @@ -231,6 +231,10 @@ fn send_yuv( ); let arrow = data.into_arrow(); + metadata.parameters.insert( + "primitive".to_string(), + Parameter::String("image".to_string()), + ); node.send_output(id, metadata.parameters.clone(), arrow) .context("could not send output") .unwrap(); @@ -248,6 +252,9 @@ fn send_yuv( let data = pkt.data; let arrow = data.into_arrow(); + metadata + .parameters + .insert("primitive".to_string(), Parameter::String("image".to_string())); node.send_output(id, metadata.parameters.clone(), arrow) .context("could not send output") .unwrap(); @@ -428,6 +435,10 @@ pub fn lib_main() -> Result<()> { ); let arrow = data.into_arrow(); + metadata.parameters.insert( + "primitive".to_string(), + Parameter::String("image".to_string()), + ); node.send_output(id, metadata.parameters.clone(), arrow) .context("could not send output") @@ -439,6 +450,10 @@ pub fn lib_main() -> Result<()> { Parameter::String("av1".to_string()), ); let arrow = data.into_arrow(); + metadata.parameters.insert( + "primitive".to_string(), + Parameter::String("image".to_string()), + ); node.send_output(id, metadata.parameters, arrow) .context("could not send output") .unwrap(); diff --git a/node-hub/dora-reachy2/dora_reachy2/camera.py b/node-hub/dora-reachy2/dora_reachy2/camera.py index e570c962..9921bf5f 100644 --- a/node-hub/dora-reachy2/dora_reachy2/camera.py +++ b/node-hub/dora-reachy2/dora_reachy2/camera.py @@ -47,6 +47,7 @@ def main(): "encoding": "bgr8", "width": image_left.shape[1], "height": image_left.shape[0], + "primitive": "image", }, ) @@ -64,6 +65,7 @@ def main(): "encoding": "bgr8", "width": image_right.shape[1], "height": image_right.shape[0], + "primitive": "image", }, ) @@ -76,6 +78,7 @@ def main(): "encoding": "bgr8", "width": depth_image.shape[1], "height": depth_image.shape[0], + "primitive": "image", }, ) @@ -92,6 +95,7 @@ def main(): "height": height, "focal": [int(k[0, 0]), int(k[1, 1])], "resolution": [int(k[0, 2]), int(k[1, 2])], + "primitive": "depth", }, ) diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md index 974b7e5a..9f8df5f8 100644 --- a/node-hub/dora-rerun/README.md +++ b/node-hub/dora-rerun/README.md @@ -4,6 +4,99 @@ dora visualization using `rerun` This nodes is still experimental and format for passing Images, Bounding boxes, and text are probably going to change in the future. +## Changes in v0.24.0 + +This version introduces significant breaking changes to align with Rerun SDK v0.24.0 and improve the visualization primitive system: + +### Major Breaking Changes + +1. **Primitive-based Visualization System** + - **BREAKING**: All inputs now require a `primitive` metadata field to specify the visualization type + - Previously, visualization type was inferred from the input ID (e.g., "image", "depth", "boxes2d") + - Now you must explicitly specify: `metadata: { "primitive": "image" }` (or "depth", "boxes2d", etc.) + - This change allows more flexible naming of inputs and clearer intent + +2. **Rerun SDK Upgrade** + - Updated from Rerun v0.23.3 to v0.24.0 + - Updated Python dependency from `rerun_sdk>=0.23.1` to `rerun_sdk>=0.24.0` + +3. **New 3D Boxes Support** + - Added comprehensive 3D bounding box visualization with multiple format support + - Supports three formats: "center_half_size" (default), "center_size", and "min_max" + - Configurable rendering: wireframe (default) or solid fill + - Support for per-box colors and labels + +4. **Enhanced Depth Visualization** + - Depth data now supports pinhole camera setup for proper 3D reconstruction + - Requires Float32Array format (previously supported Float64 and UInt16) + - New metadata fields for camera configuration: + - `camera_position`: [x, y, z] position + - `camera_orientation`: [x, y, z, w] quaternion + - `focal`: [fx, fy] focal lengths + - `principal_point`: [cx, cy] principal point (optional) + - Without camera metadata, depth is logged but 3D reconstruction is skipped + +5. **Removed Features** + - Removed series/time-series visualization support + - Removed legacy camera pitch configuration via CAMERA_PITCH environment variable + - Removed automatic depth-to-3D point cloud conversion without proper camera parameters + +### Migration Guide + +#### Before (old system): +```yaml +nodes: + - id: rerun + inputs: + image: camera/image # Type inferred from "image" in ID + depth: sensor/depth # Type inferred from "depth" in ID + boxes2d: detector/boxes2d # Type inferred from "boxes2d" in ID +``` + +#### After (new system): +```yaml +nodes: + - id: rerun + inputs: + camera_feed: camera/image + depth_sensor: sensor/depth + detections: detector/boxes2d + # Visualization types must be specified in the sender's metadata: + # camera/image must send: metadata { "primitive": "image" } + # sensor/depth must send: metadata { "primitive": "depth" } + # detector/boxes2d must send: metadata { "primitive": "boxes2d" } +``` + +### Migration Status + +**Successfully tested examples:** +- ✅ examples/rerun-viewer/dataflow.yml - Basic camera visualization +- ✅ examples/camera/dataflow_rerun.yml - Camera with rerun +- ✅ examples/python-dataflow/dataflow.yml - Camera + YOLO object detection +- ✅ examples/python-multi-env/dataflow.yml - Multi-environment setup + +**Updated but NOT tested examples:** +- 🔧 examples/keyboard/dataflow.yml - Updated dora-keyboard to send primitive metadata (requires Linux/X11 for testing) +- 🔧 examples/translation/* - Updated dora-distil-whisper and dora-argotranslate to send primitive metadata +- 🔧 examples/reachy2-remote/dataflow_reachy.yml - Updated multiple nodes (dora-reachy2, dora-qwen2-5-vl, dora-sam2, parse_bbox.py, parse_whisper.py) +- 🔧 examples/lebai/graphs/dataflow_full.yml - Updated dora-qwenvl, llama-factory-recorder, key_interpolation.py +- 🔧 examples/av1-encoding/* - Updated dora-dav1d and dora-rav1e to send primitive metadata + +Key changes made: +1. Added `-e` flag to local package installs in dataflows for development +2. Updated node packages to include `"primitive"` metadata: + - opencv-video-capture: adds `"primitive": "image"` + - dora-yolo: adds `"primitive": "boxes2d"` + - dora-keyboard: adds `"primitive": "text"` + - dora-distil-whisper: adds `"primitive": "text"` + - dora-argotranslate: adds `"primitive": "text"` + - dora-sam2: adds `"primitive": "masks"` + - dora-qwen2-5-vl: adds `"primitive": "text"` + - dora-qwenvl: adds `"primitive": "text"` + - dora-reachy2/camera.py: adds appropriate primitives + - dora-dav1d: adds `"primitive": "image"` + - dora-rav1e: adds `"primitive": "image"` + ## Getting Started ```bash @@ -30,24 +123,76 @@ pip install dora-rerun RERUN_MEMORY_LIMIT: 25% ``` -## Input definition - -- image: UInt8Array + metadata { "width": int, "height": int, "encoding": str } -- boxes2D: StructArray + metadata { "format": str } -- boxes3D: Float32Array/StructArray + metadata { "format": str, "solid": bool, "color": list[int] } - - Formats: "center_half_size" (default) [cx, cy, cz, hx, hy, hz], - "center_size" [cx, cy, cz, sx, sy, sz], - "min_max" [min_x, min_y, min_z, max_x, max_y, max_z] - - Default rendering: wireframe (set "solid": true for filled boxes) - - Color: RGB array [r, g, b] with values 0-255 -- text: StringArray -- jointstate: Float32Array -- points3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radii": list[float] } -- points2d: Float32Array (xy pairs) -- lines3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radius": float } -- depth: Float32Array + metadata { "width": int, "height": int, "camera_position": list[float], "camera_orientation": list[float], "focal": list[float], "principal_point": list[float] } - - With camera metadata: creates pinhole camera view with depth image - - Without camera metadata: skips 3D reconstruction +## Supported Visualization Primitives + +All inputs require a `"primitive"` field in the metadata to specify the visualization type: + +### 1. image +- **Data**: UInt8Array +- **Required metadata**: `{ "primitive": "image", "width": int, "height": int, "encoding": str }` +- **Supported encodings**: "bgr8", "rgb8", "jpeg", "png", "avif" + +### 2. depth +- **Data**: Float32Array +- **Required metadata**: `{ "primitive": "depth", "width": int, "height": int }` +- **Optional metadata for 3D reconstruction**: + - `"camera_position"`: [x, y, z] position + - `"camera_orientation"`: [x, y, z, w] quaternion + - `"focal"`: [fx, fy] focal lengths + - `"principal_point"`: [cx, cy] principal point + +### 3. text +- **Data**: StringArray +- **Required metadata**: `{ "primitive": "text" }` + +### 4. boxes2d +- **Data**: StructArray or Float32Array +- **Required metadata**: `{ "primitive": "boxes2d", "format": str }` +- **Formats**: "xyxy" (default), "xywh" + +### 5. boxes3d +- **Data**: Float32Array or StructArray +- **Required metadata**: `{ "primitive": "boxes3d" }` +- **Optional metadata**: + - `"format"`: "center_half_size" (default), "center_size", "min_max" + - `"solid"`: bool (default false for wireframe) + - `"color"`: [r, g, b] RGB values 0-255 + +### 6. masks +- **Data**: UInt8Array +- **Required metadata**: `{ "primitive": "masks", "width": int, "height": int }` + +### 7. jointstate +- **Data**: Float32Array +- **Required metadata**: `{ "primitive": "jointstate" }` +- **Note**: Requires URDF configuration (see below) + +### 8. pose +- **Data**: Float32Array (7 values: [x, y, z, qx, qy, qz, qw]) +- **Required metadata**: `{ "primitive": "pose" }` + +### 9. series +- **Data**: Float32Array +- **Required metadata**: `{ "primitive": "series" }` +- **Note**: Currently logs only the first value as a scalar + +### 10. points3d +- **Data**: Float32Array (xyz triplets) +- **Required metadata**: `{ "primitive": "points3d" }` +- **Optional metadata**: + - `"color"`: [r, g, b] RGB values 0-255 + - `"radii"`: list of float radius values + +### 11. points2d +- **Data**: Float32Array (xy pairs) +- **Required metadata**: `{ "primitive": "points2d" }` + +### 12. lines3d +- **Data**: Float32Array (xyz triplets defining line segments) +- **Required metadata**: `{ "primitive": "lines3d" }` +- **Optional metadata**: + - `"color"`: [r, g, b] RGB values 0-255 + - `"radius"`: float line thickness ## (Experimental) For plotting 3D URDF diff --git a/node-hub/dora-sam2/dora_sam2/main.py b/node-hub/dora-sam2/dora_sam2/main.py index 37b216a9..be74e6f7 100644 --- a/node-hub/dora-sam2/dora_sam2/main.py +++ b/node-hub/dora-sam2/dora_sam2/main.py @@ -134,7 +134,7 @@ def main(): if "boxes2d" in event_id: if len(event["value"]) == 0: - node.send_output("masks", pa.array([])) + node.send_output("masks", pa.array([]), {"primitive": "masks"}) continue if isinstance(event["value"], pa.StructArray): boxes2d = event["value"][0].get("bbox").values.to_numpy() @@ -183,8 +183,10 @@ def main(): ## Mask to 3 channel image match return_type: case pa.Array: + metadata["primitive"] = "masks" node.send_output("masks", pa.array(masks.ravel()), metadata) case pa.StructArray: + metadata["primitive"] = "masks" node.send_output( "masks", pa.array( diff --git a/node-hub/dora-yolo/dora_yolo/main.py b/node-hub/dora-yolo/dora_yolo/main.py index b8e345df..c3cbc62c 100644 --- a/node-hub/dora-yolo/dora_yolo/main.py +++ b/node-hub/dora-yolo/dora_yolo/main.py @@ -95,6 +95,7 @@ def main(): metadata = event["metadata"] metadata["format"] = bbox_format + metadata["primitive"] = "boxes2d" node.send_output( "bbox", diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py index 57f13ea5..2b2473e8 100644 --- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py +++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py @@ -217,6 +217,7 @@ def main(): jsonl_file=default_record_json_path, messages=messages, ) + metadata["primitive"] = "text" node.send_output( "text", pa.array([ground_truth]), diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py index abfc08d1..548e95a6 100644 --- a/node-hub/opencv-video-capture/opencv_video_capture/main.py +++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py @@ -125,6 +125,7 @@ def main(): metadata["encoding"] = encoding metadata["width"] = int(frame.shape[1]) metadata["height"] = int(frame.shape[0]) + metadata["primitive"] = "image" # Get the right encoding if encoding == "rgb8":