| @@ -1,6 +1,6 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install ../../node-hub/opencv-video-capture | |||
| build: pip install -e ../../node-hub/opencv-video-capture | |||
| path: opencv-video-capture | |||
| _unstable_deploy: | |||
| machine: encoder | |||
| @@ -1,6 +1,6 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install ../../node-hub/opencv-video-capture | |||
| build: pip install -e ../../node-hub/opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/20 | |||
| @@ -1,6 +1,6 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install ../../node-hub/opencv-video-capture | |||
| build: pip install -e ../../node-hub/opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/20 | |||
| @@ -16,40 +16,40 @@ for event in node: | |||
| char = event["value"][0].as_py() | |||
| if char == "w": | |||
| node.send_output("text", pa.array(["forward"])) | |||
| node.send_output("text", pa.array(["forward"]), {"primitive": "text"}) | |||
| elif char == "s": | |||
| node.send_output("text", pa.array(["back"])) | |||
| node.send_output("text", pa.array(["back"]), {"primitive": "text"}) | |||
| elif char == "c": | |||
| node.send_output("text", pa.array([" go home"])) | |||
| node.send_output("text", pa.array([" go home"]), {"primitive": "text"}) | |||
| elif char == "d": | |||
| node.send_output("text", pa.array(["right"])) | |||
| node.send_output("text", pa.array(["right"]), {"primitive": "text"}) | |||
| elif char == "a": | |||
| node.send_output("text", pa.array(["left"])) | |||
| node.send_output("text", pa.array(["left"]), {"primitive": "text"}) | |||
| elif char == "e": | |||
| node.send_output("text", pa.array(["up"])) | |||
| node.send_output("text", pa.array(["up"]), {"primitive": "text"}) | |||
| elif char == "q": | |||
| node.send_output("text", pa.array(["down"])) | |||
| node.send_output("text", pa.array(["down"]), {"primitive": "text"}) | |||
| elif char == "t": | |||
| node.send_output("text", pa.array(["close"])) | |||
| node.send_output("text", pa.array(["close"]), {"primitive": "text"}) | |||
| elif char == "r": | |||
| node.send_output("text", pa.array(["open"])) | |||
| node.send_output("text", pa.array(["open"]), {"primitive": "text"}) | |||
| elif char == "6": | |||
| node.send_output("text", pa.array(["yaw right"])) | |||
| node.send_output("text", pa.array(["yaw right"]), {"primitive": "text"}) | |||
| elif char == "4": | |||
| node.send_output("text", pa.array(["yaw left"])) | |||
| node.send_output("text", pa.array(["yaw left"]), {"primitive": "text"}) | |||
| elif char == "3": | |||
| node.send_output("text", pa.array(["yaw shoulder right"])) | |||
| node.send_output("text", pa.array(["yaw shoulder right"]), {"primitive": "text"}) | |||
| elif char == "1": | |||
| node.send_output("text", pa.array(["yaw shoulder left"])) | |||
| node.send_output("text", pa.array(["yaw shoulder left"]), {"primitive": "text"}) | |||
| elif char == "8": | |||
| node.send_output("text", pa.array(["pitch up"])) | |||
| node.send_output("text", pa.array(["pitch up"]), {"primitive": "text"}) | |||
| elif char == "2": | |||
| node.send_output("text", pa.array(["pitch down"])) | |||
| node.send_output("text", pa.array(["pitch down"]), {"primitive": "text"}) | |||
| elif char == "7": | |||
| node.send_output("text", pa.array(["roll left"])) | |||
| node.send_output("text", pa.array(["roll left"]), {"primitive": "text"}) | |||
| elif char == "9": | |||
| node.send_output("text", pa.array(["roll right"])) | |||
| node.send_output("text", pa.array(["roll right"]), {"primitive": "text"}) | |||
| elif char == "x": | |||
| node.send_output("text", pa.array(["stop"])) | |||
| node.send_output("text", pa.array(["stop"]), {"primitive": "text"}) | |||
| elif char == "j": | |||
| node.send_output("text", pa.array([""])) | |||
| node.send_output("text", pa.array([""]), {"primitive": "text"}) | |||
| @@ -23,7 +23,7 @@ nodes: | |||
| VIRTUAL_ENV: env_2 | |||
| - id: plot | |||
| build: pip install dora-rerun | |||
| build: pip install -e ../../node-hub/dora-rerun | |||
| path: dora-rerun | |||
| inputs: | |||
| image: camera/image | |||
| @@ -50,7 +50,7 @@ def extract_bboxes(json_text): | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| if len(event["value"]) == 0: | |||
| node.send_output("bbox_track", pa.array([])) | |||
| node.send_output("bbox_track", pa.array([]), {"primitive": "boxes2d"}) | |||
| continue | |||
| text = event["value"][0].as_py() | |||
| @@ -62,6 +62,7 @@ for event in node: | |||
| bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) | |||
| metadata["image_id"] = image_id | |||
| metadata["encoding"] = "xyxy" | |||
| metadata["primitive"] = "boxes2d" | |||
| if image_id == "image_left": | |||
| node.send_output( | |||
| "bbox_track", | |||
| @@ -57,16 +57,16 @@ for event in node: | |||
| node.send_output("points", pa.array([], type=pa.float64())) | |||
| elif "follow" in text: | |||
| text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object" | |||
| node.send_output("text", pa.array([text]), {"image_id": "image_left"}) | |||
| node.send_output("text", pa.array([text]), {"image_id": "image_left", "primitive": "text"}) | |||
| elif "grab " in text: | |||
| text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object" | |||
| node.send_output( | |||
| "text", pa.array([text]), {"image_id": "image_depth", "action": "grab"} | |||
| "text", pa.array([text]), {"image_id": "image_depth", "action": "grab", "primitive": "text"} | |||
| ) | |||
| elif "get " in text: | |||
| text = f"Given the prompt: {text}. Output the bounding boxes for the object" | |||
| node.send_output( | |||
| "text", pa.array([text]), {"image_id": "image_left", "action": "grab"} | |||
| "text", pa.array([text]), {"image_id": "image_left", "action": "grab", "primitive": "text"} | |||
| ) | |||
| last_prompt = text | |||
| elif "put " in text: | |||
| @@ -74,7 +74,7 @@ for event in node: | |||
| node.send_output( | |||
| "text", | |||
| pa.array([text]), | |||
| {"image_id": "image_left", "action": "release"}, | |||
| {"image_id": "image_left", "action": "release", "primitive": "text"}, | |||
| ) | |||
| last_prompt = text | |||
| elif "drop " in text: | |||
| @@ -82,7 +82,7 @@ for event in node: | |||
| node.send_output( | |||
| "text", | |||
| pa.array([text]), | |||
| {"image_id": "image_depth", "action": "release"}, | |||
| {"image_id": "image_depth", "action": "release", "primitive": "text"}, | |||
| ) | |||
| elif "release left" in text: | |||
| node.send_output("action_release_left", pa.array([1.0])) | |||
| @@ -123,13 +123,13 @@ for event in node: | |||
| node.send_output( | |||
| "text", | |||
| pa.array([text]), | |||
| {"image_id": "image_depth", "action": "grab"}, | |||
| {"image_id": "image_depth", "action": "grab", "primitive": "text"}, | |||
| ) | |||
| elif "put " in text: | |||
| text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object" | |||
| node.send_output( | |||
| "text", | |||
| pa.array([text]), | |||
| {"image_id": "image_depth", "action": "release"}, | |||
| {"image_id": "image_depth", "action": "release", "primitive": "text"}, | |||
| ) | |||
| @@ -1,6 +1,6 @@ | |||
| nodes: | |||
| - id: camera | |||
| build: pip install ../../node-hub/opencv-video-capture | |||
| build: pip install -e ../../node-hub/opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/20 | |||
| @@ -42,5 +42,5 @@ def main(): | |||
| node.send_output( | |||
| "text", | |||
| pa.array([translated_text]), | |||
| {"language": to_code}, | |||
| {"language": to_code, "primitive": "text"}, | |||
| ) | |||
| @@ -112,6 +112,12 @@ pub fn lib_main() -> Result<()> { | |||
| p.height() as i64 | |||
| ), | |||
| ); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| dora_node_api::Parameter::String( | |||
| "image".to_string(), | |||
| ), | |||
| ); | |||
| node.send_output(id, metadata.parameters, arrow) | |||
| .unwrap(); | |||
| @@ -131,6 +137,12 @@ pub fn lib_main() -> Result<()> { | |||
| "bgr8".to_string(), | |||
| ), | |||
| ); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| dora_node_api::Parameter::String( | |||
| "image".to_string(), | |||
| ), | |||
| ); | |||
| node.send_output(id, metadata.parameters, arrow) | |||
| .unwrap(); | |||
| } | |||
| @@ -154,6 +166,12 @@ pub fn lib_main() -> Result<()> { | |||
| "mono8".to_string(), | |||
| ), | |||
| ); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| dora_node_api::Parameter::String( | |||
| "image".to_string(), | |||
| ), | |||
| ); | |||
| node.send_output(id, metadata.parameters, arrow) | |||
| .unwrap(); | |||
| } | |||
| @@ -167,6 +185,12 @@ pub fn lib_main() -> Result<()> { | |||
| "mono16".to_string(), | |||
| ), | |||
| ); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| dora_node_api::Parameter::String( | |||
| "image".to_string(), | |||
| ), | |||
| ); | |||
| node.send_output(id, metadata.parameters, arrow) | |||
| .unwrap(); | |||
| } | |||
| @@ -236,5 +236,5 @@ def main(): | |||
| if text.strip() == "" or text.strip() == ".": | |||
| continue | |||
| node.send_output( | |||
| "text", pa.array([text]), {"language": TARGET_LANGUAGE}, | |||
| "text", pa.array([text]), {"language": TARGET_LANGUAGE, "primitive": "text"}, | |||
| ) | |||
| @@ -23,7 +23,7 @@ def main(): | |||
| if event is not None and isinstance(event, Events.Press): | |||
| if hasattr(event.key, "char"): | |||
| if event.key.char is not None: | |||
| node.send_output("char", pa.array([event.key.char])) | |||
| node.send_output("char", pa.array([event.key.char]), {"primitive": "text"}) | |||
| if __name__ == "__main__": | |||
| @@ -316,6 +316,7 @@ def main(): | |||
| ) | |||
| metadata = event["metadata"] | |||
| metadata["image_id"] = image_id if image_id is not None else "all" | |||
| metadata["primitive"] = "text" | |||
| node.send_output( | |||
| "text", | |||
| pa.array([response]), | |||
| @@ -162,7 +162,7 @@ def main(): | |||
| node.send_output( | |||
| "tick", | |||
| pa.array([response]), | |||
| {}, | |||
| {"primitive": "text"}, | |||
| ) | |||
| elif event_id == "text": | |||
| @@ -176,7 +176,7 @@ def main(): | |||
| node.send_output( | |||
| "text", | |||
| pa.array([response]), | |||
| {}, | |||
| {"primitive": "text"}, | |||
| ) | |||
| elif event_type == "ERROR": | |||
| @@ -231,6 +231,10 @@ fn send_yuv( | |||
| ); | |||
| let arrow = data.into_arrow(); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| Parameter::String("image".to_string()), | |||
| ); | |||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||
| .context("could not send output") | |||
| .unwrap(); | |||
| @@ -248,6 +252,9 @@ fn send_yuv( | |||
| let data = pkt.data; | |||
| let arrow = data.into_arrow(); | |||
| metadata | |||
| .parameters | |||
| .insert("primitive".to_string(), Parameter::String("image".to_string())); | |||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||
| .context("could not send output") | |||
| .unwrap(); | |||
| @@ -428,6 +435,10 @@ pub fn lib_main() -> Result<()> { | |||
| ); | |||
| let arrow = data.into_arrow(); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| Parameter::String("image".to_string()), | |||
| ); | |||
| node.send_output(id, metadata.parameters.clone(), arrow) | |||
| .context("could not send output") | |||
| @@ -439,6 +450,10 @@ pub fn lib_main() -> Result<()> { | |||
| Parameter::String("av1".to_string()), | |||
| ); | |||
| let arrow = data.into_arrow(); | |||
| metadata.parameters.insert( | |||
| "primitive".to_string(), | |||
| Parameter::String("image".to_string()), | |||
| ); | |||
| node.send_output(id, metadata.parameters, arrow) | |||
| .context("could not send output") | |||
| .unwrap(); | |||
| @@ -47,6 +47,7 @@ def main(): | |||
| "encoding": "bgr8", | |||
| "width": image_left.shape[1], | |||
| "height": image_left.shape[0], | |||
| "primitive": "image", | |||
| }, | |||
| ) | |||
| @@ -64,6 +65,7 @@ def main(): | |||
| "encoding": "bgr8", | |||
| "width": image_right.shape[1], | |||
| "height": image_right.shape[0], | |||
| "primitive": "image", | |||
| }, | |||
| ) | |||
| @@ -76,6 +78,7 @@ def main(): | |||
| "encoding": "bgr8", | |||
| "width": depth_image.shape[1], | |||
| "height": depth_image.shape[0], | |||
| "primitive": "image", | |||
| }, | |||
| ) | |||
| @@ -92,6 +95,7 @@ def main(): | |||
| "height": height, | |||
| "focal": [int(k[0, 0]), int(k[1, 1])], | |||
| "resolution": [int(k[0, 2]), int(k[1, 2])], | |||
| "primitive": "depth", | |||
| }, | |||
| ) | |||
| @@ -4,6 +4,99 @@ dora visualization using `rerun` | |||
| This nodes is still experimental and format for passing Images, Bounding boxes, and text are probably going to change in the future. | |||
| ## Changes in v0.24.0 | |||
| This version introduces significant breaking changes to align with Rerun SDK v0.24.0 and improve the visualization primitive system: | |||
| ### Major Breaking Changes | |||
| 1. **Primitive-based Visualization System** | |||
| - **BREAKING**: All inputs now require a `primitive` metadata field to specify the visualization type | |||
| - Previously, visualization type was inferred from the input ID (e.g., "image", "depth", "boxes2d") | |||
| - Now you must explicitly specify: `metadata: { "primitive": "image" }` (or "depth", "boxes2d", etc.) | |||
| - This change allows more flexible naming of inputs and clearer intent | |||
| 2. **Rerun SDK Upgrade** | |||
| - Updated from Rerun v0.23.3 to v0.24.0 | |||
| - Updated Python dependency from `rerun_sdk>=0.23.1` to `rerun_sdk>=0.24.0` | |||
| 3. **New 3D Boxes Support** | |||
| - Added comprehensive 3D bounding box visualization with multiple format support | |||
| - Supports three formats: "center_half_size" (default), "center_size", and "min_max" | |||
| - Configurable rendering: wireframe (default) or solid fill | |||
| - Support for per-box colors and labels | |||
| 4. **Enhanced Depth Visualization** | |||
| - Depth data now supports pinhole camera setup for proper 3D reconstruction | |||
| - Requires Float32Array format (previously supported Float64 and UInt16) | |||
| - New metadata fields for camera configuration: | |||
| - `camera_position`: [x, y, z] position | |||
| - `camera_orientation`: [x, y, z, w] quaternion | |||
| - `focal`: [fx, fy] focal lengths | |||
| - `principal_point`: [cx, cy] principal point (optional) | |||
| - Without camera metadata, depth is logged but 3D reconstruction is skipped | |||
| 5. **Removed Features** | |||
| - Removed series/time-series visualization support | |||
| - Removed legacy camera pitch configuration via CAMERA_PITCH environment variable | |||
| - Removed automatic depth-to-3D point cloud conversion without proper camera parameters | |||
| ### Migration Guide | |||
| #### Before (old system): | |||
| ```yaml | |||
| nodes: | |||
| - id: rerun | |||
| inputs: | |||
| image: camera/image # Type inferred from "image" in ID | |||
| depth: sensor/depth # Type inferred from "depth" in ID | |||
| boxes2d: detector/boxes2d # Type inferred from "boxes2d" in ID | |||
| ``` | |||
| #### After (new system): | |||
| ```yaml | |||
| nodes: | |||
| - id: rerun | |||
| inputs: | |||
| camera_feed: camera/image | |||
| depth_sensor: sensor/depth | |||
| detections: detector/boxes2d | |||
| # Visualization types must be specified in the sender's metadata: | |||
| # camera/image must send: metadata { "primitive": "image" } | |||
| # sensor/depth must send: metadata { "primitive": "depth" } | |||
| # detector/boxes2d must send: metadata { "primitive": "boxes2d" } | |||
| ``` | |||
| ### Migration Status | |||
| **Successfully tested examples:** | |||
| - ✅ examples/rerun-viewer/dataflow.yml - Basic camera visualization | |||
| - ✅ examples/camera/dataflow_rerun.yml - Camera with rerun | |||
| - ✅ examples/python-dataflow/dataflow.yml - Camera + YOLO object detection | |||
| - ✅ examples/python-multi-env/dataflow.yml - Multi-environment setup | |||
| **Updated but NOT tested examples:** | |||
| - 🔧 examples/keyboard/dataflow.yml - Updated dora-keyboard to send primitive metadata (requires Linux/X11 for testing) | |||
| - 🔧 examples/translation/* - Updated dora-distil-whisper and dora-argotranslate to send primitive metadata | |||
| - 🔧 examples/reachy2-remote/dataflow_reachy.yml - Updated multiple nodes (dora-reachy2, dora-qwen2-5-vl, dora-sam2, parse_bbox.py, parse_whisper.py) | |||
| - 🔧 examples/lebai/graphs/dataflow_full.yml - Updated dora-qwenvl, llama-factory-recorder, key_interpolation.py | |||
| - 🔧 examples/av1-encoding/* - Updated dora-dav1d and dora-rav1e to send primitive metadata | |||
| Key changes made: | |||
| 1. Added `-e` flag to local package installs in dataflows for development | |||
| 2. Updated node packages to include `"primitive"` metadata: | |||
| - opencv-video-capture: adds `"primitive": "image"` | |||
| - dora-yolo: adds `"primitive": "boxes2d"` | |||
| - dora-keyboard: adds `"primitive": "text"` | |||
| - dora-distil-whisper: adds `"primitive": "text"` | |||
| - dora-argotranslate: adds `"primitive": "text"` | |||
| - dora-sam2: adds `"primitive": "masks"` | |||
| - dora-qwen2-5-vl: adds `"primitive": "text"` | |||
| - dora-qwenvl: adds `"primitive": "text"` | |||
| - dora-reachy2/camera.py: adds appropriate primitives | |||
| - dora-dav1d: adds `"primitive": "image"` | |||
| - dora-rav1e: adds `"primitive": "image"` | |||
| ## Getting Started | |||
| ```bash | |||
| @@ -30,24 +123,76 @@ pip install dora-rerun | |||
| RERUN_MEMORY_LIMIT: 25% | |||
| ``` | |||
| ## Input definition | |||
| - image: UInt8Array + metadata { "width": int, "height": int, "encoding": str } | |||
| - boxes2D: StructArray + metadata { "format": str } | |||
| - boxes3D: Float32Array/StructArray + metadata { "format": str, "solid": bool, "color": list[int] } | |||
| - Formats: "center_half_size" (default) [cx, cy, cz, hx, hy, hz], | |||
| "center_size" [cx, cy, cz, sx, sy, sz], | |||
| "min_max" [min_x, min_y, min_z, max_x, max_y, max_z] | |||
| - Default rendering: wireframe (set "solid": true for filled boxes) | |||
| - Color: RGB array [r, g, b] with values 0-255 | |||
| - text: StringArray | |||
| - jointstate: Float32Array | |||
| - points3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radii": list[float] } | |||
| - points2d: Float32Array (xy pairs) | |||
| - lines3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radius": float } | |||
| - depth: Float32Array + metadata { "width": int, "height": int, "camera_position": list[float], "camera_orientation": list[float], "focal": list[float], "principal_point": list[float] } | |||
| - With camera metadata: creates pinhole camera view with depth image | |||
| - Without camera metadata: skips 3D reconstruction | |||
| ## Supported Visualization Primitives | |||
| All inputs require a `"primitive"` field in the metadata to specify the visualization type: | |||
| ### 1. image | |||
| - **Data**: UInt8Array | |||
| - **Required metadata**: `{ "primitive": "image", "width": int, "height": int, "encoding": str }` | |||
| - **Supported encodings**: "bgr8", "rgb8", "jpeg", "png", "avif" | |||
| ### 2. depth | |||
| - **Data**: Float32Array | |||
| - **Required metadata**: `{ "primitive": "depth", "width": int, "height": int }` | |||
| - **Optional metadata for 3D reconstruction**: | |||
| - `"camera_position"`: [x, y, z] position | |||
| - `"camera_orientation"`: [x, y, z, w] quaternion | |||
| - `"focal"`: [fx, fy] focal lengths | |||
| - `"principal_point"`: [cx, cy] principal point | |||
| ### 3. text | |||
| - **Data**: StringArray | |||
| - **Required metadata**: `{ "primitive": "text" }` | |||
| ### 4. boxes2d | |||
| - **Data**: StructArray or Float32Array | |||
| - **Required metadata**: `{ "primitive": "boxes2d", "format": str }` | |||
| - **Formats**: "xyxy" (default), "xywh" | |||
| ### 5. boxes3d | |||
| - **Data**: Float32Array or StructArray | |||
| - **Required metadata**: `{ "primitive": "boxes3d" }` | |||
| - **Optional metadata**: | |||
| - `"format"`: "center_half_size" (default), "center_size", "min_max" | |||
| - `"solid"`: bool (default false for wireframe) | |||
| - `"color"`: [r, g, b] RGB values 0-255 | |||
| ### 6. masks | |||
| - **Data**: UInt8Array | |||
| - **Required metadata**: `{ "primitive": "masks", "width": int, "height": int }` | |||
| ### 7. jointstate | |||
| - **Data**: Float32Array | |||
| - **Required metadata**: `{ "primitive": "jointstate" }` | |||
| - **Note**: Requires URDF configuration (see below) | |||
| ### 8. pose | |||
| - **Data**: Float32Array (7 values: [x, y, z, qx, qy, qz, qw]) | |||
| - **Required metadata**: `{ "primitive": "pose" }` | |||
| ### 9. series | |||
| - **Data**: Float32Array | |||
| - **Required metadata**: `{ "primitive": "series" }` | |||
| - **Note**: Currently logs only the first value as a scalar | |||
| ### 10. points3d | |||
| - **Data**: Float32Array (xyz triplets) | |||
| - **Required metadata**: `{ "primitive": "points3d" }` | |||
| - **Optional metadata**: | |||
| - `"color"`: [r, g, b] RGB values 0-255 | |||
| - `"radii"`: list of float radius values | |||
| ### 11. points2d | |||
| - **Data**: Float32Array (xy pairs) | |||
| - **Required metadata**: `{ "primitive": "points2d" }` | |||
| ### 12. lines3d | |||
| - **Data**: Float32Array (xyz triplets defining line segments) | |||
| - **Required metadata**: `{ "primitive": "lines3d" }` | |||
| - **Optional metadata**: | |||
| - `"color"`: [r, g, b] RGB values 0-255 | |||
| - `"radius"`: float line thickness | |||
| ## (Experimental) For plotting 3D URDF | |||
| @@ -134,7 +134,7 @@ def main(): | |||
| if "boxes2d" in event_id: | |||
| if len(event["value"]) == 0: | |||
| node.send_output("masks", pa.array([])) | |||
| node.send_output("masks", pa.array([]), {"primitive": "masks"}) | |||
| continue | |||
| if isinstance(event["value"], pa.StructArray): | |||
| boxes2d = event["value"][0].get("bbox").values.to_numpy() | |||
| @@ -183,8 +183,10 @@ def main(): | |||
| ## Mask to 3 channel image | |||
| match return_type: | |||
| case pa.Array: | |||
| metadata["primitive"] = "masks" | |||
| node.send_output("masks", pa.array(masks.ravel()), metadata) | |||
| case pa.StructArray: | |||
| metadata["primitive"] = "masks" | |||
| node.send_output( | |||
| "masks", | |||
| pa.array( | |||
| @@ -95,6 +95,7 @@ def main(): | |||
| metadata = event["metadata"] | |||
| metadata["format"] = bbox_format | |||
| metadata["primitive"] = "boxes2d" | |||
| node.send_output( | |||
| "bbox", | |||
| @@ -217,6 +217,7 @@ def main(): | |||
| jsonl_file=default_record_json_path, | |||
| messages=messages, | |||
| ) | |||
| metadata["primitive"] = "text" | |||
| node.send_output( | |||
| "text", | |||
| pa.array([ground_truth]), | |||
| @@ -125,6 +125,7 @@ def main(): | |||
| metadata["encoding"] = encoding | |||
| metadata["width"] = int(frame.shape[1]) | |||
| metadata["height"] = int(frame.shape[0]) | |||
| metadata["primitive"] = "image" | |||
| # Get the right encoding | |||
| if encoding == "rgb8": | |||