Browse Source

fixes examples for refactored rerun

pull/1096/head
rozgo 5 months ago
parent
commit
0e6942b1bd
No known key found for this signature in database GPG Key ID: 4D9A9CFE2A067CE
21 changed files with 251 additions and 56 deletions
  1. +1
    -1
      examples/av1-encoding/dataflow.yml
  2. +1
    -1
      examples/camera/dataflow_jupyter.yml
  3. +1
    -1
      examples/camera/dataflow_rerun.yml
  4. +19
    -19
      examples/lebai/nodes/key_interpolation.py
  5. +1
    -1
      examples/python-multi-env/dataflow.yml
  6. +2
    -1
      examples/reachy2-remote/parse_bbox.py
  7. +7
    -7
      examples/reachy2-remote/parse_whisper.py
  8. +1
    -1
      examples/rerun-viewer/dataflow.yml
  9. +1
    -1
      node-hub/dora-argotranslate/dora_argotranslate/main.py
  10. +24
    -0
      node-hub/dora-dav1d/src/lib.rs
  11. +1
    -1
      node-hub/dora-distil-whisper/dora_distil_whisper/main.py
  12. +1
    -1
      node-hub/dora-keyboard/dora_keyboard/main.py
  13. +1
    -0
      node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
  14. +2
    -2
      node-hub/dora-qwenvl/dora_qwenvl/main.py
  15. +15
    -0
      node-hub/dora-rav1e/src/lib.rs
  16. +4
    -0
      node-hub/dora-reachy2/dora_reachy2/camera.py
  17. +163
    -18
      node-hub/dora-rerun/README.md
  18. +3
    -1
      node-hub/dora-sam2/dora_sam2/main.py
  19. +1
    -0
      node-hub/dora-yolo/dora_yolo/main.py
  20. +1
    -0
      node-hub/llama-factory-recorder/llama_factory_recorder/main.py
  21. +1
    -0
      node-hub/opencv-video-capture/opencv_video_capture/main.py

+ 1
- 1
examples/av1-encoding/dataflow.yml View File

@@ -1,6 +1,6 @@
nodes:
- id: camera
build: pip install ../../node-hub/opencv-video-capture
build: pip install -e ../../node-hub/opencv-video-capture
path: opencv-video-capture
_unstable_deploy:
machine: encoder


+ 1
- 1
examples/camera/dataflow_jupyter.yml View File

@@ -1,6 +1,6 @@
nodes:
- id: camera
build: pip install ../../node-hub/opencv-video-capture
build: pip install -e ../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/20


+ 1
- 1
examples/camera/dataflow_rerun.yml View File

@@ -1,6 +1,6 @@
nodes:
- id: camera
build: pip install ../../node-hub/opencv-video-capture
build: pip install -e ../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/20


+ 19
- 19
examples/lebai/nodes/key_interpolation.py View File

@@ -16,40 +16,40 @@ for event in node:
char = event["value"][0].as_py()

if char == "w":
node.send_output("text", pa.array(["forward"]))
node.send_output("text", pa.array(["forward"]), {"primitive": "text"})
elif char == "s":
node.send_output("text", pa.array(["back"]))
node.send_output("text", pa.array(["back"]), {"primitive": "text"})
elif char == "c":
node.send_output("text", pa.array([" go home"]))
node.send_output("text", pa.array([" go home"]), {"primitive": "text"})
elif char == "d":
node.send_output("text", pa.array(["right"]))
node.send_output("text", pa.array(["right"]), {"primitive": "text"})
elif char == "a":
node.send_output("text", pa.array(["left"]))
node.send_output("text", pa.array(["left"]), {"primitive": "text"})
elif char == "e":
node.send_output("text", pa.array(["up"]))
node.send_output("text", pa.array(["up"]), {"primitive": "text"})
elif char == "q":
node.send_output("text", pa.array(["down"]))
node.send_output("text", pa.array(["down"]), {"primitive": "text"})
elif char == "t":
node.send_output("text", pa.array(["close"]))
node.send_output("text", pa.array(["close"]), {"primitive": "text"})
elif char == "r":
node.send_output("text", pa.array(["open"]))
node.send_output("text", pa.array(["open"]), {"primitive": "text"})
elif char == "6":
node.send_output("text", pa.array(["yaw right"]))
node.send_output("text", pa.array(["yaw right"]), {"primitive": "text"})
elif char == "4":
node.send_output("text", pa.array(["yaw left"]))
node.send_output("text", pa.array(["yaw left"]), {"primitive": "text"})
elif char == "3":
node.send_output("text", pa.array(["yaw shoulder right"]))
node.send_output("text", pa.array(["yaw shoulder right"]), {"primitive": "text"})
elif char == "1":
node.send_output("text", pa.array(["yaw shoulder left"]))
node.send_output("text", pa.array(["yaw shoulder left"]), {"primitive": "text"})
elif char == "8":
node.send_output("text", pa.array(["pitch up"]))
node.send_output("text", pa.array(["pitch up"]), {"primitive": "text"})
elif char == "2":
node.send_output("text", pa.array(["pitch down"]))
node.send_output("text", pa.array(["pitch down"]), {"primitive": "text"})
elif char == "7":
node.send_output("text", pa.array(["roll left"]))
node.send_output("text", pa.array(["roll left"]), {"primitive": "text"})
elif char == "9":
node.send_output("text", pa.array(["roll right"]))
node.send_output("text", pa.array(["roll right"]), {"primitive": "text"})
elif char == "x":
node.send_output("text", pa.array(["stop"]))
node.send_output("text", pa.array(["stop"]), {"primitive": "text"})
elif char == "j":
node.send_output("text", pa.array([""]))
node.send_output("text", pa.array([""]), {"primitive": "text"})

+ 1
- 1
examples/python-multi-env/dataflow.yml View File

@@ -23,7 +23,7 @@ nodes:
VIRTUAL_ENV: env_2

- id: plot
build: pip install dora-rerun
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
image: camera/image


+ 2
- 1
examples/reachy2-remote/parse_bbox.py View File

@@ -50,7 +50,7 @@ def extract_bboxes(json_text):
for event in node:
if event["type"] == "INPUT":
if len(event["value"]) == 0:
node.send_output("bbox_track", pa.array([]))
node.send_output("bbox_track", pa.array([]), {"primitive": "boxes2d"})
continue

text = event["value"][0].as_py()
@@ -62,6 +62,7 @@ for event in node:
bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
metadata["image_id"] = image_id
metadata["encoding"] = "xyxy"
metadata["primitive"] = "boxes2d"
if image_id == "image_left":
node.send_output(
"bbox_track",


+ 7
- 7
examples/reachy2-remote/parse_whisper.py View File

@@ -57,16 +57,16 @@ for event in node:
node.send_output("points", pa.array([], type=pa.float64()))
elif "follow" in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object"
node.send_output("text", pa.array([text]), {"image_id": "image_left"})
node.send_output("text", pa.array([text]), {"image_id": "image_left", "primitive": "text"})
elif "grab " in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object"
node.send_output(
"text", pa.array([text]), {"image_id": "image_depth", "action": "grab"}
"text", pa.array([text]), {"image_id": "image_depth", "action": "grab", "primitive": "text"}
)
elif "get " in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the object"
node.send_output(
"text", pa.array([text]), {"image_id": "image_left", "action": "grab"}
"text", pa.array([text]), {"image_id": "image_left", "action": "grab", "primitive": "text"}
)
last_prompt = text
elif "put " in text:
@@ -74,7 +74,7 @@ for event in node:
node.send_output(
"text",
pa.array([text]),
{"image_id": "image_left", "action": "release"},
{"image_id": "image_left", "action": "release", "primitive": "text"},
)
last_prompt = text
elif "drop " in text:
@@ -82,7 +82,7 @@ for event in node:
node.send_output(
"text",
pa.array([text]),
{"image_id": "image_depth", "action": "release"},
{"image_id": "image_depth", "action": "release", "primitive": "text"},
)
elif "release left" in text:
node.send_output("action_release_left", pa.array([1.0]))
@@ -123,13 +123,13 @@ for event in node:
node.send_output(
"text",
pa.array([text]),
{"image_id": "image_depth", "action": "grab"},
{"image_id": "image_depth", "action": "grab", "primitive": "text"},
)
elif "put " in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
node.send_output(
"text",
pa.array([text]),
{"image_id": "image_depth", "action": "release"},
{"image_id": "image_depth", "action": "release", "primitive": "text"},
)

+ 1
- 1
examples/rerun-viewer/dataflow.yml View File

@@ -1,6 +1,6 @@
nodes:
- id: camera
build: pip install ../../node-hub/opencv-video-capture
build: pip install -e ../../node-hub/opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/20


+ 1
- 1
node-hub/dora-argotranslate/dora_argotranslate/main.py View File

@@ -42,5 +42,5 @@ def main():
node.send_output(
"text",
pa.array([translated_text]),
{"language": to_code},
{"language": to_code, "primitive": "text"},
)

+ 24
- 0
node-hub/dora-dav1d/src/lib.rs View File

@@ -112,6 +112,12 @@ pub fn lib_main() -> Result<()> {
p.height() as i64
),
);
metadata.parameters.insert(
"primitive".to_string(),
dora_node_api::Parameter::String(
"image".to_string(),
),
);

node.send_output(id, metadata.parameters, arrow)
.unwrap();
@@ -131,6 +137,12 @@ pub fn lib_main() -> Result<()> {
"bgr8".to_string(),
),
);
metadata.parameters.insert(
"primitive".to_string(),
dora_node_api::Parameter::String(
"image".to_string(),
),
);
node.send_output(id, metadata.parameters, arrow)
.unwrap();
}
@@ -154,6 +166,12 @@ pub fn lib_main() -> Result<()> {
"mono8".to_string(),
),
);
metadata.parameters.insert(
"primitive".to_string(),
dora_node_api::Parameter::String(
"image".to_string(),
),
);
node.send_output(id, metadata.parameters, arrow)
.unwrap();
}
@@ -167,6 +185,12 @@ pub fn lib_main() -> Result<()> {
"mono16".to_string(),
),
);
metadata.parameters.insert(
"primitive".to_string(),
dora_node_api::Parameter::String(
"image".to_string(),
),
);
node.send_output(id, metadata.parameters, arrow)
.unwrap();
}


+ 1
- 1
node-hub/dora-distil-whisper/dora_distil_whisper/main.py View File

@@ -236,5 +236,5 @@ def main():
if text.strip() == "" or text.strip() == ".":
continue
node.send_output(
"text", pa.array([text]), {"language": TARGET_LANGUAGE},
"text", pa.array([text]), {"language": TARGET_LANGUAGE, "primitive": "text"},
)

+ 1
- 1
node-hub/dora-keyboard/dora_keyboard/main.py View File

@@ -23,7 +23,7 @@ def main():
if event is not None and isinstance(event, Events.Press):
if hasattr(event.key, "char"):
if event.key.char is not None:
node.send_output("char", pa.array([event.key.char]))
node.send_output("char", pa.array([event.key.char]), {"primitive": "text"})


if __name__ == "__main__":


+ 1
- 0
node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py View File

@@ -316,6 +316,7 @@ def main():
)
metadata = event["metadata"]
metadata["image_id"] = image_id if image_id is not None else "all"
metadata["primitive"] = "text"
node.send_output(
"text",
pa.array([response]),


+ 2
- 2
node-hub/dora-qwenvl/dora_qwenvl/main.py View File

@@ -162,7 +162,7 @@ def main():
node.send_output(
"tick",
pa.array([response]),
{},
{"primitive": "text"},
)

elif event_id == "text":
@@ -176,7 +176,7 @@ def main():
node.send_output(
"text",
pa.array([response]),
{},
{"primitive": "text"},
)

elif event_type == "ERROR":


+ 15
- 0
node-hub/dora-rav1e/src/lib.rs View File

@@ -231,6 +231,10 @@ fn send_yuv(
);

let arrow = data.into_arrow();
metadata.parameters.insert(
"primitive".to_string(),
Parameter::String("image".to_string()),
);
node.send_output(id, metadata.parameters.clone(), arrow)
.context("could not send output")
.unwrap();
@@ -248,6 +252,9 @@ fn send_yuv(

let data = pkt.data;
let arrow = data.into_arrow();
metadata
.parameters
.insert("primitive".to_string(), Parameter::String("image".to_string()));
node.send_output(id, metadata.parameters.clone(), arrow)
.context("could not send output")
.unwrap();
@@ -428,6 +435,10 @@ pub fn lib_main() -> Result<()> {
);

let arrow = data.into_arrow();
metadata.parameters.insert(
"primitive".to_string(),
Parameter::String("image".to_string()),
);

node.send_output(id, metadata.parameters.clone(), arrow)
.context("could not send output")
@@ -439,6 +450,10 @@ pub fn lib_main() -> Result<()> {
Parameter::String("av1".to_string()),
);
let arrow = data.into_arrow();
metadata.parameters.insert(
"primitive".to_string(),
Parameter::String("image".to_string()),
);
node.send_output(id, metadata.parameters, arrow)
.context("could not send output")
.unwrap();


+ 4
- 0
node-hub/dora-reachy2/dora_reachy2/camera.py View File

@@ -47,6 +47,7 @@ def main():
"encoding": "bgr8",
"width": image_left.shape[1],
"height": image_left.shape[0],
"primitive": "image",
},
)

@@ -64,6 +65,7 @@ def main():
"encoding": "bgr8",
"width": image_right.shape[1],
"height": image_right.shape[0],
"primitive": "image",
},
)

@@ -76,6 +78,7 @@ def main():
"encoding": "bgr8",
"width": depth_image.shape[1],
"height": depth_image.shape[0],
"primitive": "image",
},
)

@@ -92,6 +95,7 @@ def main():
"height": height,
"focal": [int(k[0, 0]), int(k[1, 1])],
"resolution": [int(k[0, 2]), int(k[1, 2])],
"primitive": "depth",
},
)



+ 163
- 18
node-hub/dora-rerun/README.md View File

@@ -4,6 +4,99 @@ dora visualization using `rerun`

This nodes is still experimental and format for passing Images, Bounding boxes, and text are probably going to change in the future.

## Changes in v0.24.0

This version introduces significant breaking changes to align with Rerun SDK v0.24.0 and improve the visualization primitive system:

### Major Breaking Changes

1. **Primitive-based Visualization System**
- **BREAKING**: All inputs now require a `primitive` metadata field to specify the visualization type
- Previously, visualization type was inferred from the input ID (e.g., "image", "depth", "boxes2d")
- Now you must explicitly specify: `metadata: { "primitive": "image" }` (or "depth", "boxes2d", etc.)
- This change allows more flexible naming of inputs and clearer intent

2. **Rerun SDK Upgrade**
- Updated from Rerun v0.23.3 to v0.24.0
- Updated Python dependency from `rerun_sdk>=0.23.1` to `rerun_sdk>=0.24.0`

3. **New 3D Boxes Support**
- Added comprehensive 3D bounding box visualization with multiple format support
- Supports three formats: "center_half_size" (default), "center_size", and "min_max"
- Configurable rendering: wireframe (default) or solid fill
- Support for per-box colors and labels

4. **Enhanced Depth Visualization**
- Depth data now supports pinhole camera setup for proper 3D reconstruction
- Requires Float32Array format (previously supported Float64 and UInt16)
- New metadata fields for camera configuration:
- `camera_position`: [x, y, z] position
- `camera_orientation`: [x, y, z, w] quaternion
- `focal`: [fx, fy] focal lengths
- `principal_point`: [cx, cy] principal point (optional)
- Without camera metadata, depth is logged but 3D reconstruction is skipped

5. **Removed Features**
- Removed series/time-series visualization support
- Removed legacy camera pitch configuration via CAMERA_PITCH environment variable
- Removed automatic depth-to-3D point cloud conversion without proper camera parameters

### Migration Guide

#### Before (old system):
```yaml
nodes:
- id: rerun
inputs:
image: camera/image # Type inferred from "image" in ID
depth: sensor/depth # Type inferred from "depth" in ID
boxes2d: detector/boxes2d # Type inferred from "boxes2d" in ID
```

#### After (new system):
```yaml
nodes:
- id: rerun
inputs:
camera_feed: camera/image
depth_sensor: sensor/depth
detections: detector/boxes2d
# Visualization types must be specified in the sender's metadata:
# camera/image must send: metadata { "primitive": "image" }
# sensor/depth must send: metadata { "primitive": "depth" }
# detector/boxes2d must send: metadata { "primitive": "boxes2d" }
```

### Migration Status

**Successfully tested examples:**
- ✅ examples/rerun-viewer/dataflow.yml - Basic camera visualization
- ✅ examples/camera/dataflow_rerun.yml - Camera with rerun
- ✅ examples/python-dataflow/dataflow.yml - Camera + YOLO object detection
- ✅ examples/python-multi-env/dataflow.yml - Multi-environment setup

**Updated but NOT tested examples:**
- 🔧 examples/keyboard/dataflow.yml - Updated dora-keyboard to send primitive metadata (requires Linux/X11 for testing)
- 🔧 examples/translation/* - Updated dora-distil-whisper and dora-argotranslate to send primitive metadata
- 🔧 examples/reachy2-remote/dataflow_reachy.yml - Updated multiple nodes (dora-reachy2, dora-qwen2-5-vl, dora-sam2, parse_bbox.py, parse_whisper.py)
- 🔧 examples/lebai/graphs/dataflow_full.yml - Updated dora-qwenvl, llama-factory-recorder, key_interpolation.py
- 🔧 examples/av1-encoding/* - Updated dora-dav1d and dora-rav1e to send primitive metadata

Key changes made:
1. Added `-e` flag to local package installs in dataflows for development
2. Updated node packages to include `"primitive"` metadata:
- opencv-video-capture: adds `"primitive": "image"`
- dora-yolo: adds `"primitive": "boxes2d"`
- dora-keyboard: adds `"primitive": "text"`
- dora-distil-whisper: adds `"primitive": "text"`
- dora-argotranslate: adds `"primitive": "text"`
- dora-sam2: adds `"primitive": "masks"`
- dora-qwen2-5-vl: adds `"primitive": "text"`
- dora-qwenvl: adds `"primitive": "text"`
- dora-reachy2/camera.py: adds appropriate primitives
- dora-dav1d: adds `"primitive": "image"`
- dora-rav1e: adds `"primitive": "image"`

## Getting Started

```bash
@@ -30,24 +123,76 @@ pip install dora-rerun
RERUN_MEMORY_LIMIT: 25%
```

## Input definition

- image: UInt8Array + metadata { "width": int, "height": int, "encoding": str }
- boxes2D: StructArray + metadata { "format": str }
- boxes3D: Float32Array/StructArray + metadata { "format": str, "solid": bool, "color": list[int] }
- Formats: "center_half_size" (default) [cx, cy, cz, hx, hy, hz],
"center_size" [cx, cy, cz, sx, sy, sz],
"min_max" [min_x, min_y, min_z, max_x, max_y, max_z]
- Default rendering: wireframe (set "solid": true for filled boxes)
- Color: RGB array [r, g, b] with values 0-255
- text: StringArray
- jointstate: Float32Array
- points3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radii": list[float] }
- points2d: Float32Array (xy pairs)
- lines3d: Float32Array (xyz triplets) + metadata { "color": list[int] (RGB 0-255), "radius": float }
- depth: Float32Array + metadata { "width": int, "height": int, "camera_position": list[float], "camera_orientation": list[float], "focal": list[float], "principal_point": list[float] }
- With camera metadata: creates pinhole camera view with depth image
- Without camera metadata: skips 3D reconstruction
## Supported Visualization Primitives

All inputs require a `"primitive"` field in the metadata to specify the visualization type:

### 1. image
- **Data**: UInt8Array
- **Required metadata**: `{ "primitive": "image", "width": int, "height": int, "encoding": str }`
- **Supported encodings**: "bgr8", "rgb8", "jpeg", "png", "avif"

### 2. depth
- **Data**: Float32Array
- **Required metadata**: `{ "primitive": "depth", "width": int, "height": int }`
- **Optional metadata for 3D reconstruction**:
- `"camera_position"`: [x, y, z] position
- `"camera_orientation"`: [x, y, z, w] quaternion
- `"focal"`: [fx, fy] focal lengths
- `"principal_point"`: [cx, cy] principal point

### 3. text
- **Data**: StringArray
- **Required metadata**: `{ "primitive": "text" }`

### 4. boxes2d
- **Data**: StructArray or Float32Array
- **Required metadata**: `{ "primitive": "boxes2d", "format": str }`
- **Formats**: "xyxy" (default), "xywh"

### 5. boxes3d
- **Data**: Float32Array or StructArray
- **Required metadata**: `{ "primitive": "boxes3d" }`
- **Optional metadata**:
- `"format"`: "center_half_size" (default), "center_size", "min_max"
- `"solid"`: bool (default false for wireframe)
- `"color"`: [r, g, b] RGB values 0-255

### 6. masks
- **Data**: UInt8Array
- **Required metadata**: `{ "primitive": "masks", "width": int, "height": int }`

### 7. jointstate
- **Data**: Float32Array
- **Required metadata**: `{ "primitive": "jointstate" }`
- **Note**: Requires URDF configuration (see below)

### 8. pose
- **Data**: Float32Array (7 values: [x, y, z, qx, qy, qz, qw])
- **Required metadata**: `{ "primitive": "pose" }`

### 9. series
- **Data**: Float32Array
- **Required metadata**: `{ "primitive": "series" }`
- **Note**: Currently logs only the first value as a scalar

### 10. points3d
- **Data**: Float32Array (xyz triplets)
- **Required metadata**: `{ "primitive": "points3d" }`
- **Optional metadata**:
- `"color"`: [r, g, b] RGB values 0-255
- `"radii"`: list of float radius values

### 11. points2d
- **Data**: Float32Array (xy pairs)
- **Required metadata**: `{ "primitive": "points2d" }`

### 12. lines3d
- **Data**: Float32Array (xyz triplets defining line segments)
- **Required metadata**: `{ "primitive": "lines3d" }`
- **Optional metadata**:
- `"color"`: [r, g, b] RGB values 0-255
- `"radius"`: float line thickness

## (Experimental) For plotting 3D URDF



+ 3
- 1
node-hub/dora-sam2/dora_sam2/main.py View File

@@ -134,7 +134,7 @@ def main():

if "boxes2d" in event_id:
if len(event["value"]) == 0:
node.send_output("masks", pa.array([]))
node.send_output("masks", pa.array([]), {"primitive": "masks"})
continue
if isinstance(event["value"], pa.StructArray):
boxes2d = event["value"][0].get("bbox").values.to_numpy()
@@ -183,8 +183,10 @@ def main():
## Mask to 3 channel image
match return_type:
case pa.Array:
metadata["primitive"] = "masks"
node.send_output("masks", pa.array(masks.ravel()), metadata)
case pa.StructArray:
metadata["primitive"] = "masks"
node.send_output(
"masks",
pa.array(


+ 1
- 0
node-hub/dora-yolo/dora_yolo/main.py View File

@@ -95,6 +95,7 @@ def main():

metadata = event["metadata"]
metadata["format"] = bbox_format
metadata["primitive"] = "boxes2d"

node.send_output(
"bbox",


+ 1
- 0
node-hub/llama-factory-recorder/llama_factory_recorder/main.py View File

@@ -217,6 +217,7 @@ def main():
jsonl_file=default_record_json_path,
messages=messages,
)
metadata["primitive"] = "text"
node.send_output(
"text",
pa.array([ground_truth]),


+ 1
- 0
node-hub/opencv-video-capture/opencv_video_capture/main.py View File

@@ -125,6 +125,7 @@ def main():
metadata["encoding"] = encoding
metadata["width"] = int(frame.shape[1])
metadata["height"] = int(frame.shape[0])
metadata["primitive"] = "image"

# Get the right encoding
if encoding == "rgb8":


Loading…
Cancel
Save