Adding mediapipe with dora-rerun visualization

9 months ago · 44c9fb6dff
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -666,7 +666,7 @@ dependencies = [
 "enumflags2",
 "futures-channel",
 "futures-util",
 "rand 0.9.0",
 "rand 0.9.1",
 "raw-window-handle 0.6.2",
 "serde",
 "serde_repr",
@@ -1640,7 +1640,7 @@ dependencies = [
 "metal 0.27.0",
 "num-traits",
 "num_cpus",
 "rand 0.9.0",
 "rand 0.9.1",
 "rand_distr",
 "rayon",
 "safetensors",
@@ -3302,6 +3302,7 @@ dependencies = [
 "k",
 "ndarray 0.15.6",
 "pyo3",
 "rand 0.9.1",
 "rerun",
 "tokio",
 ]
@@ -4179,7 +4180,7 @@ dependencies = [
 "cudarc",
 "half",
 "num-traits",
 "rand 0.9.0",
 "rand 0.9.1",
 "rand_distr",
 ]

@@ -4951,7 +4952,7 @@ dependencies = [
 "cfg-if 1.0.0",
 "crunchy",
 "num-traits",
 "rand 0.9.0",
 "rand 0.9.1",
 "rand_distr",
 ]

@@ -6754,7 +6755,7 @@ dependencies = [
 "image",
 "indexmap 2.8.0",
 "mistralrs-core",
 "rand 0.9.0",
 "rand 0.9.1",
 "reqwest 0.12.15",
 "serde",
 "serde_json",
@@ -6806,7 +6807,7 @@ dependencies = [
 "objc",
 "once_cell",
 "radix_trie",
 "rand 0.9.0",
 "rand 0.9.1",
 "rand_isaac",
 "rayon",
 "regex",
@@ -7961,7 +7962,7 @@ dependencies = [
 "glob",
 "opentelemetry 0.29.1",
 "percent-encoding",
 "rand 0.9.0",
 "rand 0.9.1",
 "serde_json",
 "thiserror 2.0.12",
 "tokio",
@@ -9021,7 +9022,7 @@ checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc"
 dependencies = [
 "bytes",
 "getrandom 0.3.2",
 "rand 0.9.0",
 "rand 0.9.1",
 "ring 0.17.14",
 "rustc-hash 2.1.1",
 "rustls 0.23.25",
@@ -9099,13 +9100,12 @@ dependencies = [

 [[package]]
 name = "rand"
 version = "0.9.0"
 version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
 checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
 dependencies = [
 "rand_chacha 0.9.0",
 "rand_core 0.9.3",
 "zerocopy 0.8.24",
 ]

 [[package]]
@@ -9153,7 +9153,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
 dependencies = [
 "num-traits",
 "rand 0.9.0",
 "rand 0.9.1",
 ]

 [[package]]
@@ -11540,7 +11540,7 @@ dependencies = [
 "num-derive",
 "num-traits",
 "paste",
 "rand 0.9.0",
 "rand 0.9.1",
 "serde",
 "serde_repr",
 "socket2 0.5.8",
@@ -14292,7 +14292,7 @@ checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
 dependencies = [
 "getrandom 0.3.2",
 "js-sys",
 "rand 0.9.0",
 "rand 0.9.1",
 "serde",
 "uuid-macro-internal",
 "wasm-bindgen",
--- a/examples/mediapipe/realsense-dev.yml
+++ b/examples/mediapipe/realsense-dev.yml
@@ -0,0 +1,26 @@
 nodes:
  - id: camera
    build: pip install -e ../../node-hub/dora-pyrealsense
    path: dora-pyrealsense
    inputs:
      tick: dora/timer/millis/100
    outputs:
      - image
      - depth

  - id: dora-mediapipe
    build: pip install -e ../../node-hub/dora-mediapipe
    path: dora-mediapipe
    inputs:
      image: camera/image
      depth: camera/depth
    outputs:
      - points3d

  - id: plot
    build: pip install dora-rerun
    path: dora-rerun
    inputs:
      realsense/image: camera/image
      realsense/depth: camera/depth
      realsense/points3d: dora-mediapipe/points3d
--- a/examples/mediapipe/rgb-dev.yml
+++ b/examples/mediapipe/rgb-dev.yml
@@ -0,0 +1,25 @@
 nodes:
  - id: camera
    build: pip install opencv-video-capture
    path: opencv-video-capture
    inputs:
      tick: dora/timer/millis/100
    outputs:
      - image
    env:
      CAPTURE_PATH: 0

  - id: dora-mediapipe
    build: pip install -e ../../node-hub/dora-mediapipe
    path: dora-mediapipe
    inputs:
      image: camera/image
    outputs:
      - points2d

  - id: plot
    build: pip install dora-rerun
    path: dora-rerun
    inputs:
      image: camera/image
      series_mediapipe: dora-mediapipe/points2d
--- a/node-hub/dora-mediapipe/README.md
+++ b/node-hub/dora-mediapipe/README.md
@@ -0,0 +1,40 @@
 # dora-mediapipe

 ## Getting started

 - Install it with uv:

 ```bash
 uv venv -p 3.11 --seed
 uv pip install -e .
 ```

 ## Contribution Guide

 - Format with [ruff](https://docs.astral.sh/ruff/):

 ```bash
 uv pip install ruff
 uv run ruff check . --fix
 ```

 - Lint with ruff:

 ```bash
 uv run ruff check .
 ```

 - Test with [pytest](https://github.com/pytest-dev/pytest)

 ```bash
 uv pip install pytest
 uv run pytest . # Test
 ```

 ## YAML Specification

 ## Examples

 ## License

 dora-mediapipe's code are released under the MIT License
--- a/node-hub/dora-mediapipe/dora_mediapipe/init.py
+++ b/node-hub/dora-mediapipe/dora_mediapipe/init.py
@@ -0,0 +1,13 @@
 """TODO: Add docstring."""

 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/dora-mediapipe/dora_mediapipe/main.py
+++ b/node-hub/dora-mediapipe/dora_mediapipe/main.py
@@ -0,0 +1,6 @@
 """TODO: Add docstring."""

 from .main import main

 if __name__ == "__main__":
    main()
--- a/node-hub/dora-mediapipe/dora_mediapipe/main.py
+++ b/node-hub/dora-mediapipe/dora_mediapipe/main.py
@@ -0,0 +1,129 @@
 """TODO: Add docstring."""

 import cv2
 import mediapipe as mp
 import numpy as np
 import pyarrow as pa
 from dora import Node

 # Initialiser MediaPipe Pose
 mp_pose = mp.solutions.pose
 pose = mp_pose.Pose()
 mp_draw = mp.solutions.drawing_utils


 def get_3d_coordinates(landmark, depth_frame, w, h, resolution, focal_length):
    cx, cy = int(landmark.x * w), int(landmark.y * h)
    if 0 < cx < w and 0 < cy < h:
        depth = depth_frame[cx, cy] / 1_000.0
        if depth > 0:
            fx, fy = focal_length
            ppx, ppy = resolution
            x = (cy - ppy) * depth / fy
            y = (cx - ppx) * depth / fx

            # Convert to right-handed coordinate system
            return [x, -y, depth]
    return [0, 0, 0]


 def get_image(event):
    storage = event["value"]
    metadata = event["metadata"]
    encoding = metadata["encoding"]
    width = metadata["width"]
    height = metadata["height"]

    if (
        encoding == "bgr8"
        or encoding == "rgb8"
        or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
    ):
        channels = 3
        storage_type = np.uint8
    else:
        raise RuntimeError(f"Unsupported image encoding: {encoding}")

    if encoding == "bgr8":
        frame = (
            storage.to_numpy().astype(storage_type).reshape((height, width, channels))
        )
        frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
    elif encoding == "rgb8":
        frame = (
            storage.to_numpy().astype(storage_type).reshape((height, width, channels))
        )
    elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
        storage = storage.to_numpy()
        frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
        frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
    else:
        raise RuntimeError(f"Unsupported image encoding: {encoding}")
    return frame


 def main():
    """TODO: Add docstring."""
    node = Node()
    depth = None
    focal_length = None
    resolution = None

    for event in node:
        if event["type"] == "INPUT":
            event_id = event["id"]
            if "image" in event_id:
                rgb_image = get_image(event)
                width = rgb_image.shape[1]
                height = rgb_image.shape[0]
                pose_results = pose.process(rgb_image)
                if pose_results.pose_landmarks:
                    values = pose_results.pose_landmarks.landmark
                    values = np.array(
                        [
                            [landmark.x * width, landmark.y * height]
                            for landmark in pose_results.pose_landmarks.landmark
                        ]
                    )
                    # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
                    node.send_output(
                        output_id="points2d",
                        data=pa.array(values.ravel()),
                        metadata={},
                    )
                    if depth is not None:
                        values = np.array(
                            [
                                get_3d_coordinates(
                                    landmark,
                                    depth,
                                    width,
                                    height,
                                    resolution,
                                    focal_length,
                                )
                                for landmark in pose_results.pose_landmarks.landmark
                            ]
                        )
                        # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
                        node.send_output(
                            output_id="points3d",
                            data=pa.array(values.ravel()),
                            metadata={},
                        )

                else:
                    print("No pose landmarks detected.")
            elif "depth" in event_id:
                metadata = event["metadata"]
                encoding = metadata["encoding"]
                width = metadata["width"]
                height = metadata["height"]
                focal_length = metadata["focal_length"]
                resolution = metadata["resolution"]

                depth = event["value"].to_numpy().reshape((height, width))


 if __name__ == "__main__":
    main()
--- a/node-hub/dora-mediapipe/pyproject.toml
+++ b/node-hub/dora-mediapipe/pyproject.toml
@@ -0,0 +1,25 @@
 [project]
 name = "dora-mediapipe"
 version = "0.0.0"
 authors = [{ name = "Your Name", email = "email@email.com" }]
 description = "dora-mediapipe"
 license = { text = "MIT" }
 readme = "README.md"
 requires-python = ">=3.8"

 dependencies = [
    "dora-rs >= 0.3.9",
    "mediapipe>=0.10.14",
 ]

 [dependency-groups]
 dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

 [project.scripts]
 dora-mediapipe = "dora_mediapipe.main:main"

 [tool.ruff.lint]
 extend-select = [
  "D",   # pydocstyle
  "UP"
 ]
--- a/node-hub/dora-mediapipe/tests/test_dora_mediapipe.py
+++ b/node-hub/dora-mediapipe/tests/test_dora_mediapipe.py
@@ -0,0 +1,13 @@
 """Test module for dora_mediapipe package."""

 import pytest


 def test_import_main():
    """Test importing and running the main function."""
    from dora_mediapipe.main import main

    # Check that everything is working, and catch Dora RuntimeError
    # as we're not running in a Dora dataflow.
    with pytest.raises(RuntimeError):
        main()
--- a/node-hub/dora-mediapipe/uv.lock
+++ b/node-hub/dora-mediapipe/uv.lock
--- a/node-hub/dora-rerun/Cargo.toml
+++ b/node-hub/dora-rerun/Cargo.toml
@@ -27,6 +27,7 @@ pyo3 = { workspace = true, features = [
    "generate-import-lib",
 ], optional = true }
 bytemuck = "1.20.0"
 rand = "0.9.1"


 [lib]
--- a/node-hub/dora-rerun/src/lib.rs
+++ b/node-hub/dora-rerun/src/lib.rs
@@ -4,7 +4,7 @@ use std::{collections::HashMap, env::VarError, path::Path};

 use dora_node_api::{
    arrow::{
        array::{Array, AsArray, Float64Array, StringArray, UInt16Array, UInt8Array},
        array::{Array, AsArray, Float32Array, Float64Array, StringArray, UInt16Array, UInt8Array},
        datatypes::Float32Type,
    },
    dora_core::config::DataId,
@@ -30,6 +30,7 @@ pub fn lib_main() -> Result<()> {
    // Setup an image cache to paint depth images.
    let mut image_cache = HashMap::new();
    let mut mask_cache: HashMap<DataId, Vec<bool>> = HashMap::new();
    let mut color_cache: HashMap<DataId, rerun::Color> = HashMap::new();
    let mut options = SpawnOptions::default();

    let memory_limit = match std::env::var("RERUN_MEMORY_LIMIT") {
@@ -349,6 +350,33 @@ pub fn lib_main() -> Result<()> {
                }
            } else if id.as_str().contains("series") {
                update_series(&rec, id, data).context("could not plot series")?;
            } else if id.as_str().contains("points3d") {
                // Get color or assign random color in cache
                let color = color_cache.get(&id);
                let color = if let Some(color) = color {
                    color.clone()
                } else {
                    let color =
                        rerun::Color::from_rgb(rand::random::<u8>(), 180, rand::random::<u8>());

                    color_cache.insert(id.clone(), color.clone());
                    color
                };
                let dataid = id;

                // get a random color
                if let Ok(buffer) = into_vec::<f32>(&data) {
                    let mut points = vec![];
                    let mut colors = vec![];
                    buffer.chunks(3).for_each(|chunk| {
                        points.push((chunk[0], chunk[1], chunk[2]));
                        colors.push(color);
                    });
                    let points = Points3D::new(points).with_radii(vec![0.013; colors.len()]);

                    rec.log(dataid.as_str(), &points.with_colors(colors))
                        .context("could not log points")?;
                }
            } else {
                println!("Could not find handler for {}", id);
            }