Browse Source

Adding mediapipe with dora-rerun visualization

tags/v0.3.12-rc0
haixuanTao haixuantao 9 months ago
parent
commit
44c9fb6dff
12 changed files with 2573 additions and 15 deletions
  1. +14
    -14
      Cargo.lock
  2. +26
    -0
      examples/mediapipe/realsense-dev.yml
  3. +25
    -0
      examples/mediapipe/rgb-dev.yml
  4. +40
    -0
      node-hub/dora-mediapipe/README.md
  5. +13
    -0
      node-hub/dora-mediapipe/dora_mediapipe/__init__.py
  6. +6
    -0
      node-hub/dora-mediapipe/dora_mediapipe/__main__.py
  7. +129
    -0
      node-hub/dora-mediapipe/dora_mediapipe/main.py
  8. +25
    -0
      node-hub/dora-mediapipe/pyproject.toml
  9. +13
    -0
      node-hub/dora-mediapipe/tests/test_dora_mediapipe.py
  10. +2252
    -0
      node-hub/dora-mediapipe/uv.lock
  11. +1
    -0
      node-hub/dora-rerun/Cargo.toml
  12. +29
    -1
      node-hub/dora-rerun/src/lib.rs

+ 14
- 14
Cargo.lock View File

@@ -666,7 +666,7 @@ dependencies = [
"enumflags2",
"futures-channel",
"futures-util",
"rand 0.9.0",
"rand 0.9.1",
"raw-window-handle 0.6.2",
"serde",
"serde_repr",
@@ -1640,7 +1640,7 @@ dependencies = [
"metal 0.27.0",
"num-traits",
"num_cpus",
"rand 0.9.0",
"rand 0.9.1",
"rand_distr",
"rayon",
"safetensors",
@@ -3302,6 +3302,7 @@ dependencies = [
"k",
"ndarray 0.15.6",
"pyo3",
"rand 0.9.1",
"rerun",
"tokio",
]
@@ -4179,7 +4180,7 @@ dependencies = [
"cudarc",
"half",
"num-traits",
"rand 0.9.0",
"rand 0.9.1",
"rand_distr",
]

@@ -4951,7 +4952,7 @@ dependencies = [
"cfg-if 1.0.0",
"crunchy",
"num-traits",
"rand 0.9.0",
"rand 0.9.1",
"rand_distr",
]

@@ -6754,7 +6755,7 @@ dependencies = [
"image",
"indexmap 2.8.0",
"mistralrs-core",
"rand 0.9.0",
"rand 0.9.1",
"reqwest 0.12.15",
"serde",
"serde_json",
@@ -6806,7 +6807,7 @@ dependencies = [
"objc",
"once_cell",
"radix_trie",
"rand 0.9.0",
"rand 0.9.1",
"rand_isaac",
"rayon",
"regex",
@@ -7961,7 +7962,7 @@ dependencies = [
"glob",
"opentelemetry 0.29.1",
"percent-encoding",
"rand 0.9.0",
"rand 0.9.1",
"serde_json",
"thiserror 2.0.12",
"tokio",
@@ -9021,7 +9022,7 @@ checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc"
dependencies = [
"bytes",
"getrandom 0.3.2",
"rand 0.9.0",
"rand 0.9.1",
"ring 0.17.14",
"rustc-hash 2.1.1",
"rustls 0.23.25",
@@ -9099,13 +9100,12 @@ dependencies = [

[[package]]
name = "rand"
version = "0.9.0"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.3",
"zerocopy 0.8.24",
]

[[package]]
@@ -9153,7 +9153,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
dependencies = [
"num-traits",
"rand 0.9.0",
"rand 0.9.1",
]

[[package]]
@@ -11540,7 +11540,7 @@ dependencies = [
"num-derive",
"num-traits",
"paste",
"rand 0.9.0",
"rand 0.9.1",
"serde",
"serde_repr",
"socket2 0.5.8",
@@ -14292,7 +14292,7 @@ checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
dependencies = [
"getrandom 0.3.2",
"js-sys",
"rand 0.9.0",
"rand 0.9.1",
"serde",
"uuid-macro-internal",
"wasm-bindgen",


+ 26
- 0
examples/mediapipe/realsense-dev.yml View File

@@ -0,0 +1,26 @@
nodes:
- id: camera
build: pip install -e ../../node-hub/dora-pyrealsense
path: dora-pyrealsense
inputs:
tick: dora/timer/millis/100
outputs:
- image
- depth

- id: dora-mediapipe
build: pip install -e ../../node-hub/dora-mediapipe
path: dora-mediapipe
inputs:
image: camera/image
depth: camera/depth
outputs:
- points3d

- id: plot
build: pip install dora-rerun
path: dora-rerun
inputs:
realsense/image: camera/image
realsense/depth: camera/depth
realsense/points3d: dora-mediapipe/points3d

+ 25
- 0
examples/mediapipe/rgb-dev.yml View File

@@ -0,0 +1,25 @@
nodes:
- id: camera
build: pip install opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
outputs:
- image
env:
CAPTURE_PATH: 0

- id: dora-mediapipe
build: pip install -e ../../node-hub/dora-mediapipe
path: dora-mediapipe
inputs:
image: camera/image
outputs:
- points2d

- id: plot
build: pip install dora-rerun
path: dora-rerun
inputs:
image: camera/image
series_mediapipe: dora-mediapipe/points2d

+ 40
- 0
node-hub/dora-mediapipe/README.md View File

@@ -0,0 +1,40 @@
# dora-mediapipe

## Getting started

- Install it with uv:

```bash
uv venv -p 3.11 --seed
uv pip install -e .
```

## Contribution Guide

- Format with [ruff](https://docs.astral.sh/ruff/):

```bash
uv pip install ruff
uv run ruff check . --fix
```

- Lint with ruff:

```bash
uv run ruff check .
```

- Test with [pytest](https://github.com/pytest-dev/pytest)

```bash
uv pip install pytest
uv run pytest . # Test
```

## YAML Specification

## Examples

## License

dora-mediapipe's code are released under the MIT License

+ 13
- 0
node-hub/dora-mediapipe/dora_mediapipe/__init__.py View File

@@ -0,0 +1,13 @@
"""TODO: Add docstring."""

import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 6
- 0
node-hub/dora-mediapipe/dora_mediapipe/__main__.py View File

@@ -0,0 +1,6 @@
"""TODO: Add docstring."""

from .main import main

if __name__ == "__main__":
main()

+ 129
- 0
node-hub/dora-mediapipe/dora_mediapipe/main.py View File

@@ -0,0 +1,129 @@
"""TODO: Add docstring."""

import cv2
import mediapipe as mp
import numpy as np
import pyarrow as pa
from dora import Node

# Initialiser MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_draw = mp.solutions.drawing_utils


def get_3d_coordinates(landmark, depth_frame, w, h, resolution, focal_length):
cx, cy = int(landmark.x * w), int(landmark.y * h)
if 0 < cx < w and 0 < cy < h:
depth = depth_frame[cx, cy] / 1_000.0
if depth > 0:
fx, fy = focal_length
ppx, ppy = resolution
x = (cy - ppy) * depth / fy
y = (cx - ppx) * depth / fx

# Convert to right-handed coordinate system
return [x, -y, depth]
return [0, 0, 0]


def get_image(event):
storage = event["value"]
metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

if (
encoding == "bgr8"
or encoding == "rgb8"
or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
):
channels = 3
storage_type = np.uint8
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

if encoding == "bgr8":
frame = (
storage.to_numpy().astype(storage_type).reshape((height, width, channels))
)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
elif encoding == "rgb8":
frame = (
storage.to_numpy().astype(storage_type).reshape((height, width, channels))
)
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
storage = storage.to_numpy()
frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")
return frame


def main():
"""TODO: Add docstring."""
node = Node()
depth = None
focal_length = None
resolution = None

for event in node:
if event["type"] == "INPUT":
event_id = event["id"]
if "image" in event_id:
rgb_image = get_image(event)
width = rgb_image.shape[1]
height = rgb_image.shape[0]
pose_results = pose.process(rgb_image)
if pose_results.pose_landmarks:
values = pose_results.pose_landmarks.landmark
values = np.array(
[
[landmark.x * width, landmark.y * height]
for landmark in pose_results.pose_landmarks.landmark
]
)
# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
node.send_output(
output_id="points2d",
data=pa.array(values.ravel()),
metadata={},
)
if depth is not None:
values = np.array(
[
get_3d_coordinates(
landmark,
depth,
width,
height,
resolution,
focal_length,
)
for landmark in pose_results.pose_landmarks.landmark
]
)
# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
node.send_output(
output_id="points3d",
data=pa.array(values.ravel()),
metadata={},
)

else:
print("No pose landmarks detected.")
elif "depth" in event_id:
metadata = event["metadata"]
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]
focal_length = metadata["focal_length"]
resolution = metadata["resolution"]

depth = event["value"].to_numpy().reshape((height, width))


if __name__ == "__main__":
main()

+ 25
- 0
node-hub/dora-mediapipe/pyproject.toml View File

@@ -0,0 +1,25 @@
[project]
name = "dora-mediapipe"
version = "0.0.0"
authors = [{ name = "Your Name", email = "email@email.com" }]
description = "dora-mediapipe"
license = { text = "MIT" }
readme = "README.md"
requires-python = ">=3.8"

dependencies = [
"dora-rs >= 0.3.9",
"mediapipe>=0.10.14",
]

[dependency-groups]
dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

[project.scripts]
dora-mediapipe = "dora_mediapipe.main:main"

[tool.ruff.lint]
extend-select = [
"D", # pydocstyle
"UP"
]

+ 13
- 0
node-hub/dora-mediapipe/tests/test_dora_mediapipe.py View File

@@ -0,0 +1,13 @@
"""Test module for dora_mediapipe package."""

import pytest


def test_import_main():
"""Test importing and running the main function."""
from dora_mediapipe.main import main

# Check that everything is working, and catch Dora RuntimeError
# as we're not running in a Dora dataflow.
with pytest.raises(RuntimeError):
main()

+ 2252
- 0
node-hub/dora-mediapipe/uv.lock
File diff suppressed because it is too large
View File


+ 1
- 0
node-hub/dora-rerun/Cargo.toml View File

@@ -27,6 +27,7 @@ pyo3 = { workspace = true, features = [
"generate-import-lib",
], optional = true }
bytemuck = "1.20.0"
rand = "0.9.1"


[lib]


+ 29
- 1
node-hub/dora-rerun/src/lib.rs View File

@@ -4,7 +4,7 @@ use std::{collections::HashMap, env::VarError, path::Path};

use dora_node_api::{
arrow::{
array::{Array, AsArray, Float64Array, StringArray, UInt16Array, UInt8Array},
array::{Array, AsArray, Float32Array, Float64Array, StringArray, UInt16Array, UInt8Array},
datatypes::Float32Type,
},
dora_core::config::DataId,
@@ -30,6 +30,7 @@ pub fn lib_main() -> Result<()> {
// Setup an image cache to paint depth images.
let mut image_cache = HashMap::new();
let mut mask_cache: HashMap<DataId, Vec<bool>> = HashMap::new();
let mut color_cache: HashMap<DataId, rerun::Color> = HashMap::new();
let mut options = SpawnOptions::default();

let memory_limit = match std::env::var("RERUN_MEMORY_LIMIT") {
@@ -349,6 +350,33 @@ pub fn lib_main() -> Result<()> {
}
} else if id.as_str().contains("series") {
update_series(&rec, id, data).context("could not plot series")?;
} else if id.as_str().contains("points3d") {
// Get color or assign random color in cache
let color = color_cache.get(&id);
let color = if let Some(color) = color {
color.clone()
} else {
let color =
rerun::Color::from_rgb(rand::random::<u8>(), 180, rand::random::<u8>());

color_cache.insert(id.clone(), color.clone());
color
};
let dataid = id;

// get a random color
if let Ok(buffer) = into_vec::<f32>(&data) {
let mut points = vec![];
let mut colors = vec![];
buffer.chunks(3).for_each(|chunk| {
points.push((chunk[0], chunk[1], chunk[2]));
colors.push(color);
});
let points = Points3D::new(points).with_radii(vec![0.013; colors.len()]);

rec.log(dataid.as_str(), &points.with_colors(colors))
.context("could not log points")?;
}
} else {
println!("Could not find handler for {}", id);
}


Loading…
Cancel
Save