From 0e6942b1bd9aba2ba91cebc2dc0af986adafc47a Mon Sep 17 00:00:00 2001
From: rozgo <alex.rozgo@gmail.com>
Date: Thu, 31 Jul 2025 23:29:08 -0600
Subject: [PATCH] fixes examples for refactored rerun

---
 examples/av1-encoding/dataflow.yml            |   2 +-
 examples/camera/dataflow_jupyter.yml          |   2 +-
 examples/camera/dataflow_rerun.yml            |   2 +-
 examples/lebai/nodes/key_interpolation.py     |  38 ++--
 examples/python-multi-env/dataflow.yml        |   2 +-
 examples/reachy2-remote/parse_bbox.py         |   3 +-
 examples/reachy2-remote/parse_whisper.py      |  14 +-
 examples/rerun-viewer/dataflow.yml            |   2 +-
 .../dora_argotranslate/main.py                |   2 +-
 node-hub/dora-dav1d/src/lib.rs                |  24 +++
 .../dora_distil_whisper/main.py               |   2 +-
 node-hub/dora-keyboard/dora_keyboard/main.py  |   2 +-
 .../dora-qwen2-5-vl/dora_qwen2_5_vl/main.py   |   1 +
 node-hub/dora-qwenvl/dora_qwenvl/main.py      |   4 +-
 node-hub/dora-rav1e/src/lib.rs                |  15 ++
 node-hub/dora-reachy2/dora_reachy2/camera.py  |   4 +
 node-hub/dora-rerun/README.md                 | 181 ++++++++++++++++--
 node-hub/dora-sam2/dora_sam2/main.py          |   4 +-
 node-hub/dora-yolo/dora_yolo/main.py          |   1 +
 .../llama_factory_recorder/main.py            |   1 +
 .../opencv_video_capture/main.py              |   1 +
 21 files changed, 251 insertions(+), 56 deletions(-)

diff --git a/examples/av1-encoding/dataflow.yml b/examples/av1-encoding/dataflow.yml
index 2dbcef01..7517bde6 100644
--- a/examples/av1-encoding/dataflow.yml
+++ b/examples/av1-encoding/dataflow.yml
@@ -1,6 +1,6 @@
 nodes:
   - id: camera
-    build: pip install ../../node-hub/opencv-video-capture
+    build: pip install -e ../../node-hub/opencv-video-capture
     path: opencv-video-capture
     _unstable_deploy:
       machine: encoder
diff --git a/examples/camera/dataflow_jupyter.yml b/examples/camera/dataflow_jupyter.yml
index c76e1a8f..5402e75e 100644
--- a/examples/camera/dataflow_jupyter.yml
+++ b/examples/camera/dataflow_jupyter.yml
@@ -1,6 +1,6 @@
 nodes:
   - id: camera
-    build: pip install ../../node-hub/opencv-video-capture
+    build: pip install -e ../../node-hub/opencv-video-capture
     path: opencv-video-capture
     inputs:
       tick: dora/timer/millis/20
diff --git a/examples/camera/dataflow_rerun.yml b/examples/camera/dataflow_rerun.yml
index 2633ec94..4ebaa742 100644
--- a/examples/camera/dataflow_rerun.yml
+++ b/examples/camera/dataflow_rerun.yml
@@ -1,6 +1,6 @@
 nodes:
   - id: camera
-    build: pip install ../../node-hub/opencv-video-capture
+    build: pip install -e ../../node-hub/opencv-video-capture
     path: opencv-video-capture
     inputs:
       tick: dora/timer/millis/20
diff --git a/examples/lebai/nodes/key_interpolation.py b/examples/lebai/nodes/key_interpolation.py
index 34653baf..382f684e 100644
--- a/examples/lebai/nodes/key_interpolation.py
+++ b/examples/lebai/nodes/key_interpolation.py
@@ -16,40 +16,40 @@ for event in node:
             char = event["value"][0].as_py()
 
             if char == "w":
-                node.send_output("text", pa.array(["forward"]))
+                node.send_output("text", pa.array(["forward"]), {"primitive": "text"})
             elif char == "s":
-                node.send_output("text", pa.array(["back"]))
+                node.send_output("text", pa.array(["back"]), {"primitive": "text"})
             elif char == "c":
-                node.send_output("text", pa.array([" go home"]))
+                node.send_output("text", pa.array([" go home"]), {"primitive": "text"})
             elif char == "d":
-                node.send_output("text", pa.array(["right"]))
+                node.send_output("text", pa.array(["right"]), {"primitive": "text"})
             elif char == "a":
-                node.send_output("text", pa.array(["left"]))
+                node.send_output("text", pa.array(["left"]), {"primitive": "text"})
             elif char == "e":
-                node.send_output("text", pa.array(["up"]))
+                node.send_output("text", pa.array(["up"]), {"primitive": "text"})
             elif char == "q":
-                node.send_output("text", pa.array(["down"]))
+                node.send_output("text", pa.array(["down"]), {"primitive": "text"})
             elif char == "t":
-                node.send_output("text", pa.array(["close"]))
+                node.send_output("text", pa.array(["close"]), {"primitive": "text"})
             elif char == "r":
-                node.send_output("text", pa.array(["open"]))
+                node.send_output("text", pa.array(["open"]), {"primitive": "text"})
             elif char == "6":
-                node.send_output("text", pa.array(["yaw right"]))
+                node.send_output("text", pa.array(["yaw right"]), {"primitive": "text"})
             elif char == "4":
-                node.send_output("text", pa.array(["yaw left"]))
+                node.send_output("text", pa.array(["yaw left"]), {"primitive": "text"})
             elif char == "3":
-                node.send_output("text", pa.array(["yaw shoulder right"]))
+                node.send_output("text", pa.array(["yaw shoulder right"]), {"primitive": "text"})
             elif char == "1":
-                node.send_output("text", pa.array(["yaw shoulder left"]))
+                node.send_output("text", pa.array(["yaw shoulder left"]), {"primitive": "text"})
             elif char == "8":
-                node.send_output("text", pa.array(["pitch up"]))
+                node.send_output("text", pa.array(["pitch up"]), {"primitive": "text"})
             elif char == "2":
-                node.send_output("text", pa.array(["pitch down"]))
+                node.send_output("text", pa.array(["pitch down"]), {"primitive": "text"})
             elif char == "7":
-                node.send_output("text", pa.array(["roll left"]))
+                node.send_output("text", pa.array(["roll left"]), {"primitive": "text"})
             elif char == "9":
-                node.send_output("text", pa.array(["roll right"]))
+                node.send_output("text", pa.array(["roll right"]), {"primitive": "text"})
             elif char == "x":
-                node.send_output("text", pa.array(["stop"]))
+                node.send_output("text", pa.array(["stop"]), {"primitive": "text"})
             elif char == "j":
-                node.send_output("text", pa.array([""]))
+                node.send_output("text", pa.array([""]), {"primitive": "text"})
diff --git a/examples/python-multi-env/dataflow.yml b/examples/python-multi-env/dataflow.yml
index 3bea06ab..eac4652e 100644
--- a/examples/python-multi-env/dataflow.yml
+++ b/examples/python-multi-env/dataflow.yml
@@ -23,7 +23,7 @@ nodes:
       VIRTUAL_ENV: env_2
 
   - id: plot
-    build: pip install dora-rerun
+    build: pip install -e ../../node-hub/dora-rerun
     path: dora-rerun
     inputs:
       image: camera/image
diff --git a/examples/reachy2-remote/parse_bbox.py b/examples/reachy2-remote/parse_bbox.py
index c1a992a4..0a1380fc 100644
--- a/examples/reachy2-remote/parse_bbox.py
+++ b/examples/reachy2-remote/parse_bbox.py
@@ -50,7 +50,7 @@ def extract_bboxes(json_text):
 for event in node:
     if event["type"] == "INPUT":
         if len(event["value"]) == 0:
-            node.send_output("bbox_track", pa.array([]))
+            node.send_output("bbox_track", pa.array([]), {"primitive": "boxes2d"})
             continue
 
         text = event["value"][0].as_py()
@@ -62,6 +62,7 @@ for event in node:
             bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
             metadata["image_id"] = image_id
             metadata["encoding"] = "xyxy"
+            metadata["primitive"] = "boxes2d"
             if image_id == "image_left":
                 node.send_output(
                     "bbox_track",
diff --git a/examples/reachy2-remote/parse_whisper.py b/examples/reachy2-remote/parse_whisper.py
index 98ee07de..cdfc0d57 100644
--- a/examples/reachy2-remote/parse_whisper.py
+++ b/examples/reachy2-remote/parse_whisper.py
@@ -57,16 +57,16 @@ for event in node:
                 node.send_output("points", pa.array([], type=pa.float64()))
             elif "follow" in text:
                 text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object"
-                node.send_output("text", pa.array([text]), {"image_id": "image_left"})
+                node.send_output("text", pa.array([text]), {"image_id": "image_left", "primitive": "text"})
             elif "grab " in text:
                 text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object"
                 node.send_output(
-                    "text", pa.array([text]), {"image_id": "image_depth", "action": "grab"}
+                    "text", pa.array([text]), {"image_id": "image_depth", "action": "grab", "primitive": "text"}
                 )
             elif "get " in text:
                 text = f"Given the prompt: {text}. Output the bounding boxes for the object"
                 node.send_output(
-                    "text", pa.array([text]), {"image_id": "image_left", "action": "grab"}
+                    "text", pa.array([text]), {"image_id": "image_left", "action": "grab", "primitive": "text"}
                 )
                 last_prompt = text
             elif "put " in text:
@@ -74,7 +74,7 @@ for event in node:
                 node.send_output(
                     "text",
                     pa.array([text]),
-                    {"image_id": "image_left", "action": "release"},
+                    {"image_id": "image_left", "action": "release", "primitive": "text"},
                 )
                 last_prompt = text
             elif "drop " in text:
@@ -82,7 +82,7 @@ for event in node:
                 node.send_output(
                     "text",
                     pa.array([text]),
-                    {"image_id": "image_depth", "action": "release"},
+                    {"image_id": "image_depth", "action": "release", "primitive": "text"},
                 )
             elif "release left" in text:
                 node.send_output("action_release_left", pa.array([1.0]))
@@ -123,13 +123,13 @@ for event in node:
                 node.send_output(
                     "text",
                     pa.array([text]),
-                    {"image_id": "image_depth", "action": "grab"},
+                    {"image_id": "image_depth", "action": "grab", "primitive": "text"},
                 )
             elif "put " in text:
                 text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
                 node.send_output(
                     "text",
                     pa.array([text]),
-                    {"image_id": "image_depth", "action": "release"},
+                    {"image_id": "image_depth", "action": "release", "primitive": "text"},
                 )
                 
\ No newline at end of file
diff --git a/examples/rerun-viewer/dataflow.yml b/examples/rerun-viewer/dataflow.yml
index a33d308f..c3448591 100644
--- a/examples/rerun-viewer/dataflow.yml
+++ b/examples/rerun-viewer/dataflow.yml
@@ -1,6 +1,6 @@
 nodes:
   - id: camera
-    build: pip install ../../node-hub/opencv-video-capture
+    build: pip install -e ../../node-hub/opencv-video-capture
     path: opencv-video-capture
     inputs:
       tick: dora/timer/millis/20
diff --git a/node-hub/dora-argotranslate/dora_argotranslate/main.py b/node-hub/dora-argotranslate/dora_argotranslate/main.py
index 2bab22ee..5ce8ef5a 100644
--- a/node-hub/dora-argotranslate/dora_argotranslate/main.py
+++ b/node-hub/dora-argotranslate/dora_argotranslate/main.py
@@ -42,5 +42,5 @@ def main():
             node.send_output(
                 "text",
                 pa.array([translated_text]),
-                {"language": to_code},
+                {"language": to_code, "primitive": "text"},
             )
diff --git a/node-hub/dora-dav1d/src/lib.rs b/node-hub/dora-dav1d/src/lib.rs
index 45e8477d..2d6afc7c 100644
--- a/node-hub/dora-dav1d/src/lib.rs
+++ b/node-hub/dora-dav1d/src/lib.rs
@@ -112,6 +112,12 @@ pub fn lib_main() -> Result<()> {
                                                         p.height() as i64
                                                     ),
                                                 );
+                                                metadata.parameters.insert(
+                                                    "primitive".to_string(),
+                                                    dora_node_api::Parameter::String(
+                                                        "image".to_string(),
+                                                    ),
+                                                );
 
                                                 node.send_output(id, metadata.parameters, arrow)
                                                     .unwrap();
@@ -131,6 +137,12 @@ pub fn lib_main() -> Result<()> {
                                                         "bgr8".to_string(),
                                                     ),
                                                 );
+                                                metadata.parameters.insert(
+                                                    "primitive".to_string(),
+                                                    dora_node_api::Parameter::String(
+                                                        "image".to_string(),
+                                                    ),
+                                                );
                                                 node.send_output(id, metadata.parameters, arrow)
                                                     .unwrap();
                                             }
@@ -154,6 +166,12 @@ pub fn lib_main() -> Result<()> {
                                                         "mono8".to_string(),
                                                     ),
                                                 );
+                                                metadata.parameters.insert(
+                                                    "primitive".to_string(),
+                                                    dora_node_api::Parameter::String(
+                                                        "image".to_string(),
+                                                    ),
+                                                );
                                                 node.send_output(id, metadata.parameters, arrow)
                                                     .unwrap();
                                             }
@@ -167,6 +185,12 @@ pub fn lib_main() -> Result<()> {
                                                         "mono16".to_string(),
                                                     ),
                                                 );
+                                                metadata.parameters.insert(
+                                                    "primitive".to_string(),
+                                                    dora_node_api::Parameter::String(
+                                                        "image".to_string(),
+                                                    ),
+                                                );
                                                 node.send_output(id, metadata.parameters, arrow)
                                                     .unwrap();
                                             }
diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
index 007b3b43..70c54fcf 100644
--- a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
@@ -236,5 +236,5 @@ def main():
                 if text.strip() == "" or text.strip() == ".":
                     continue
                 node.send_output(
-                    "text", pa.array([text]), {"language": TARGET_LANGUAGE},
+                    "text", pa.array([text]), {"language": TARGET_LANGUAGE, "primitive": "text"},
                 )
diff --git a/node-hub/dora-keyboard/dora_keyboard/main.py b/node-hub/dora-keyboard/dora_keyboard/main.py
index 644c10c5..6a950629 100644
--- a/node-hub/dora-keyboard/dora_keyboard/main.py
+++ b/node-hub/dora-keyboard/dora_keyboard/main.py
@@ -23,7 +23,7 @@ def main():
             if event is not None and isinstance(event, Events.Press):
                 if hasattr(event.key, "char"):
                     if event.key.char is not None:
-                        node.send_output("char", pa.array([event.key.char]))
+                        node.send_output("char", pa.array([event.key.char]), {"primitive": "text"})
 
 
 if __name__ == "__main__":
diff --git a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
index 7592bdad..95eb807c 100644
--- a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
+++ b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
@@ -316,6 +316,7 @@ def main():
                 )
                 metadata = event["metadata"]
                 metadata["image_id"] = image_id if image_id is not None else "all"
+                metadata["primitive"] = "text"
                 node.send_output(
                     "text",
                     pa.array([response]),
diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index 537e6133..dc7472a2 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -162,7 +162,7 @@ def main():
                 node.send_output(
                     "tick",
                     pa.array([response]),
-                    {},
+                    {"primitive": "text"},
                 )
 
             elif event_id == "text":
@@ -176,7 +176,7 @@ def main():
                 node.send_output(
                     "text",
                     pa.array([response]),
-                    {},
+                    {"primitive": "text"},
                 )
 
         elif event_type == "ERROR":
diff --git a/node-hub/dora-rav1e/src/lib.rs b/node-hub/dora-rav1e/src/lib.rs
index 9ce9f1fd..d9261abe 100644
--- a/node-hub/dora-rav1e/src/lib.rs
+++ b/node-hub/dora-rav1e/src/lib.rs
@@ -231,6 +231,10 @@ fn send_yuv(
                     );
 
                 let arrow = data.into_arrow();
+                metadata.parameters.insert(
+                    "primitive".to_string(),
+                    Parameter::String("image".to_string()),
+                );
                 node.send_output(id, metadata.parameters.clone(), arrow)
                     .context("could not send output")
                     .unwrap();
@@ -248,6 +252,9 @@ fn send_yuv(
 
                 let data = pkt.data;
                 let arrow = data.into_arrow();
+                metadata
+                    .parameters
+                    .insert("primitive".to_string(), Parameter::String("image".to_string()));
                 node.send_output(id, metadata.parameters.clone(), arrow)
                     .context("could not send output")
                     .unwrap();
@@ -428,6 +435,10 @@ pub fn lib_main() -> Result<()> {
                                         );
 
                                         let arrow = data.into_arrow();
+                                        metadata.parameters.insert(
+                                            "primitive".to_string(),
+                                            Parameter::String("image".to_string()),
+                                        );
 
                                         node.send_output(id, metadata.parameters.clone(), arrow)
                                             .context("could not send output")
@@ -439,6 +450,10 @@ pub fn lib_main() -> Result<()> {
                                             Parameter::String("av1".to_string()),
                                         );
                                         let arrow = data.into_arrow();
+                                        metadata.parameters.insert(
+                                            "primitive".to_string(),
+                                            Parameter::String("image".to_string()),
+                                        );
                                         node.send_output(id, metadata.parameters, arrow)
                                             .context("could not send output")
                                             .unwrap();
diff --git a/node-hub/dora-reachy2/dora_reachy2/camera.py b/node-hub/dora-reachy2/dora_reachy2/camera.py
index e570c962..9921bf5f 100644
--- a/node-hub/dora-reachy2/dora_reachy2/camera.py
+++ b/node-hub/dora-reachy2/dora_reachy2/camera.py
@@ -47,6 +47,7 @@ def main():
                         "encoding": "bgr8",
                         "width": image_left.shape[1],
                         "height": image_left.shape[0],
+                        "primitive": "image",
                     },
                 )
 
@@ -64,6 +65,7 @@ def main():
                         "encoding": "bgr8",
                         "width": image_right.shape[1],
                         "height": image_right.shape[0],
+                        "primitive": "image",
                     },
                 )
 
@@ -76,6 +78,7 @@ def main():
                         "encoding": "bgr8",
                         "width": depth_image.shape[1],
                         "height": depth_image.shape[0],
+                        "primitive": "image",
                     },
                 )
 
@@ -92,6 +95,7 @@ def main():
                             "height": height,
                             "focal": [int(k[0, 0]), int(k[1, 1])],
                             "resolution": [int(k[0, 2]), int(k[1, 2])],
+                            "primitive": "depth",
                         },
                     )
 
diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md
index 974b7e5a..9f8df5f8 100644
--- a/node-hub/dora-rerun/README.md
+++ b/node-hub/dora-rerun/README.md
@@ -4,6 +4,99 @@ dora visualization using `rerun`
 
 This nodes is still experimental and format for passing Images, Bounding boxes, and text are probably going to change in the future.
 
+## Changes in v0.24.0
+
+This version introduces significant breaking changes to align with Rerun SDK v0.24.0 and improve the visualization primitive system:
+
+### Major Breaking Changes
+
+1. **Primitive-based Visualization System**
+   - **BREAKING**: All inputs now require a `primitive` metadata field to specify the visualization type
+   - Previously, visualization type was inferred from the input ID (e.g., "image", "depth", "boxes2d")
+   - Now you must explicitly specify: `metadata: { "primitive": "image" }` (or "depth", "boxes2d", etc.)
+   - This change allows more flexible naming of inputs and clearer intent
+
+2. **Rerun SDK Upgrade**
+   - Updated from Rerun v0.23.3 to v0.24.0
+   - Updated Python dependency from `rerun_sdk>=0.23.1` to `rerun_sdk>=0.24.0`
+
+3. **New 3D Boxes Support**
+   - Added comprehensive 3D bounding box visualization with multiple format support
+   - Supports three formats: "center_half_size" (default), "center_size", and "min_max"
+   - Configurable rendering: wireframe (default) or solid fill
+   - Support for per-box colors and labels
+
+4. **Enhanced Depth Visualization**
+   - Depth data now supports pinhole camera setup for proper 3D reconstruction
+   - Requires Float32Array format (previously supported Float64 and UInt16)
+   - New metadata fields for camera configuration:
+     - `camera_position`: [x, y, z] position
+     - `camera_orientation`: [x, y, z, w] quaternion
+     - `focal`: [fx, fy] focal lengths
+     - `principal_point`: [cx, cy] principal point (optional)
+   - Without camera metadata, depth is logged but 3D reconstruction is skipped
+
+5. **Removed Features**
+   - Removed series/time-series visualization support
+   - Removed legacy camera pitch configuration via CAMERA_PITCH environment variable
+   - Removed automatic depth-to-3D point cloud conversion without proper camera parameters
+
+### Migration Guide
+
+#### Before (old system):
+```yaml
+nodes:
+  - id: rerun
+    inputs:
+      image: camera/image         # Type inferred from "image" in ID
+      depth: sensor/depth         # Type inferred from "depth" in ID
+      boxes2d: detector/boxes2d   # Type inferred from "boxes2d" in ID
+```
+
+#### After (new system):
+```yaml
+nodes:
+  - id: rerun
+    inputs:
+      camera_feed: camera/image
+      depth_sensor: sensor/depth
+      detections: detector/boxes2d
+    # Visualization types must be specified in the sender's metadata:
+    # camera/image must send: metadata { "primitive": "image" }
+    # sensor/depth must send: metadata { "primitive": "depth" }
+    # detector/boxes2d must send: metadata { "primitive": "boxes2d" }
+```
+
+### Migration Status
+
+**Successfully tested examples:**
+- ✅ examples/rerun-viewer/dataflow.yml - Basic camera visualization
+- ✅ examples/camera/dataflow_rerun.yml - Camera with rerun
+- ✅ examples/python-dataflow/dataflow.yml - Camera + YOLO object detection
+- ✅ examples/python-multi-env/dataflow.yml - Multi-environment setup
+
+**Updated but NOT tested examples:**
+- 🔧 examples/keyboard/dataflow.yml - Updated dora-keyboard to send primitive metadata (requires Linux/X11 for testing)
+- 🔧 examples/translation/* - Updated dora-distil-whisper and dora-argotranslate to send primitive metadata
+- 🔧 examples/reachy2-remote/dataflow_reachy.yml - Updated multiple nodes (dora-reachy2, dora-qwen2-5-vl, dora-sam2, parse_bbox.py, parse_whisper.py)
+- 🔧 examples/lebai/graphs/dataflow_full.yml - Updated dora-qwenvl, llama-factory-recorder, key_interpolation.py
+- 🔧 examples/av1-encoding/* - Updated dora-dav1d and dora-rav1e to send primitive metadata
+
+Key changes made:
+1. Added `-e` flag to local package installs in dataflows for development
+2. Updated node packages to include `"primitive"` metadata:
+   - opencv-video-capture: adds `"primitive": "image"`
+   - dora-yolo: adds `"primitive": "boxes2d"`
+   - dora-keyboard: adds `"primitive": "text"`
+   - dora-distil-whisper: adds `"primitive": "text"`
+   - dora-argotranslate: adds `"primitive": "text"`
+   - dora-sam2: adds `"primitive": "masks"`
+   - dora-qwen2-5-vl: adds `"primitive": "text"`
+   - dora-qwenvl: adds `"primitive": "text"`
+   - dora-reachy2/camera.py: adds appropriate primitives
+   - dora-dav1d: adds `"primitive": "image"`
+   - dora-rav1e: adds `"primitive": "image"`
+
 ## Getting Started
 
 ```bash
@@ -30,24 +123,76 @@ pip install dora-rerun
     RERUN_MEMORY_LIMIT: 25%
 ```
 
-## Input definition
-
-- image: UInt8Array + metadata { "width": int, "height": int, "encoding": str }
-- boxes2D: StructArray + metadata { "format": str }
-- boxes3D: Float32Array/StructArray + metadata { "format": str, "solid": bool, "color": list[int] }
-  - Formats: "center_half_size" (default) [cx, cy, cz, hx, hy, hz],
-            "center_size" [cx, cy, cz, sx, sy, sz], 
-            "min_max" [min_x, min_y, min_z, max_x, max_y, max_z]
-  - Default rendering: wireframe (set "solid": true for filled boxes)
-  - Color: RGB array [r, g, b] with values 0-255
-- text: StringArray
-- jointstate: Float32Array
-- points3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radii": list[float] }
-- points2d: Float32Array (xy pairs)
-- lines3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radius": float }
-- depth: Float32Array + metadata { "width": int, "height": int, "camera_position": list[float], "camera_orientation": list[float], "focal": list[float], "principal_point": list[float] }
-  - With camera metadata: creates pinhole camera view with depth image
-  - Without camera metadata: skips 3D reconstruction
+## Supported Visualization Primitives
+
+All inputs require a `"primitive"` field in the metadata to specify the visualization type:
+
+### 1. image
+- **Data**: UInt8Array
+- **Required metadata**: `{ "primitive": "image", "width": int, "height": int, "encoding": str }`
+- **Supported encodings**: "bgr8", "rgb8", "jpeg", "png", "avif"
+
+### 2. depth  
+- **Data**: Float32Array
+- **Required metadata**: `{ "primitive": "depth", "width": int, "height": int }`
+- **Optional metadata for 3D reconstruction**:
+  - `"camera_position"`: [x, y, z] position
+  - `"camera_orientation"`: [x, y, z, w] quaternion
+  - `"focal"`: [fx, fy] focal lengths
+  - `"principal_point"`: [cx, cy] principal point
+
+### 3. text
+- **Data**: StringArray
+- **Required metadata**: `{ "primitive": "text" }`
+
+### 4. boxes2d
+- **Data**: StructArray or Float32Array
+- **Required metadata**: `{ "primitive": "boxes2d", "format": str }`
+- **Formats**: "xyxy" (default), "xywh"
+
+### 5. boxes3d
+- **Data**: Float32Array or StructArray
+- **Required metadata**: `{ "primitive": "boxes3d" }`
+- **Optional metadata**:
+  - `"format"`: "center_half_size" (default), "center_size", "min_max"
+  - `"solid"`: bool (default false for wireframe)
+  - `"color"`: [r, g, b] RGB values 0-255
+
+### 6. masks
+- **Data**: UInt8Array
+- **Required metadata**: `{ "primitive": "masks", "width": int, "height": int }`
+
+### 7. jointstate
+- **Data**: Float32Array
+- **Required metadata**: `{ "primitive": "jointstate" }`
+- **Note**: Requires URDF configuration (see below)
+
+### 8. pose
+- **Data**: Float32Array (7 values: [x, y, z, qx, qy, qz, qw])
+- **Required metadata**: `{ "primitive": "pose" }`
+
+### 9. series
+- **Data**: Float32Array
+- **Required metadata**: `{ "primitive": "series" }`
+- **Note**: Currently logs only the first value as a scalar
+
+### 10. points3d
+- **Data**: Float32Array (xyz triplets)
+- **Required metadata**: `{ "primitive": "points3d" }`
+- **Optional metadata**:
+  - `"color"`: [r, g, b] RGB values 0-255
+  - `"radii"`: list of float radius values
+
+### 11. points2d
+- **Data**: Float32Array (xy pairs)
+- **Required metadata**: `{ "primitive": "points2d" }`
+
+### 12. lines3d
+- **Data**: Float32Array (xyz triplets defining line segments)
+- **Required metadata**: `{ "primitive": "lines3d" }`
+- **Optional metadata**:
+  - `"color"`: [r, g, b] RGB values 0-255
+  - `"radius"`: float line thickness
 
 ## (Experimental) For plotting 3D URDF
 
diff --git a/node-hub/dora-sam2/dora_sam2/main.py b/node-hub/dora-sam2/dora_sam2/main.py
index 37b216a9..be74e6f7 100644
--- a/node-hub/dora-sam2/dora_sam2/main.py
+++ b/node-hub/dora-sam2/dora_sam2/main.py
@@ -134,7 +134,7 @@ def main():
 
             if "boxes2d" in event_id:
                 if len(event["value"]) == 0:
-                    node.send_output("masks", pa.array([]))
+                    node.send_output("masks", pa.array([]), {"primitive": "masks"})
                     continue
                 if isinstance(event["value"], pa.StructArray):
                     boxes2d = event["value"][0].get("bbox").values.to_numpy()
@@ -183,8 +183,10 @@ def main():
                     ## Mask to 3 channel image
                     match return_type:
                         case pa.Array:
+                            metadata["primitive"] = "masks"
                             node.send_output("masks", pa.array(masks.ravel()), metadata)
                         case pa.StructArray:
+                            metadata["primitive"] = "masks"
                             node.send_output(
                                 "masks",
                                 pa.array(
diff --git a/node-hub/dora-yolo/dora_yolo/main.py b/node-hub/dora-yolo/dora_yolo/main.py
index b8e345df..c3cbc62c 100644
--- a/node-hub/dora-yolo/dora_yolo/main.py
+++ b/node-hub/dora-yolo/dora_yolo/main.py
@@ -95,6 +95,7 @@ def main():
 
                 metadata = event["metadata"]
                 metadata["format"] = bbox_format
+                metadata["primitive"] = "boxes2d"
 
                 node.send_output(
                     "bbox",
diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
index 57f13ea5..2b2473e8 100644
--- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -217,6 +217,7 @@ def main():
                     jsonl_file=default_record_json_path,
                     messages=messages,
                 )
+                metadata["primitive"] = "text"
                 node.send_output(
                     "text",
                     pa.array([ground_truth]),
diff --git a/node-hub/opencv-video-capture/opencv_video_capture/main.py b/node-hub/opencv-video-capture/opencv_video_capture/main.py
index abfc08d1..548e95a6 100644
--- a/node-hub/opencv-video-capture/opencv_video_capture/main.py
+++ b/node-hub/opencv-video-capture/opencv_video_capture/main.py
@@ -125,6 +125,7 @@ def main():
                 metadata["encoding"] = encoding
                 metadata["width"] = int(frame.shape[1])
                 metadata["height"] = int(frame.shape[0])
+                metadata["primitive"] = "image"
 
                 # Get the right encoding
                 if encoding == "rgb8":