Browse Source

Adding example dataflow

tags/v0.3.12-rc0
haixuanTao 9 months ago
parent
commit
87c7df5838
6 changed files with 425 additions and 4 deletions
  1. +180
    -0
      examples/reachy2-remote/dataflow_reachy.yml
  2. +66
    -0
      examples/reachy2-remote/parse_bbox.py
  3. +47
    -0
      examples/reachy2-remote/parse_point.py
  4. +75
    -0
      examples/reachy2-remote/parse_whisper.py
  5. +42
    -0
      examples/reachy2-remote/whisper-dev.yml
  6. +15
    -4
      node-hub/dora-cotracker/dora_cotracker/main.py

+ 180
- 0
examples/reachy2-remote/dataflow_reachy.yml View File

@@ -0,0 +1,180 @@
nodes:
- id: camera
path: dora-reachy2-camera
_unstable_deploy:
machine: encoder
inputs:
tick: dora/timer/millis/10
outputs:
- image_left
- image_depth
- depth
env:
CAPTURE_PATH: 0
IMAGE_WIDTH: 640
IMAGE_HEIGHT: 480
ROBOT_IP: 127.0.0.1

- id: rav1e-local-image
path: dora-rav1e
build: cargo build -p dora-rav1e --release
_unstable_deploy:
machine: encoder
inputs:
image_depth: camera/image_depth
image_left: camera/image_left
outputs:
- image_left
- image_depth
- depth
env:
RAV1E_SPEED: 10

- id: dav1d-remote
path: dora-dav1d
build: cargo build -p dora-dav1d --release
_unstable_deploy:
machine: gpu
inputs:
image_depth: rav1e-local-image/image_depth
image_left: rav1e-local-image/image_left
# depth: rav1e-local/depth
outputs:
- image_left
- image_depth
- depth

- id: dora-microphone
build: pip install -e ../../node-hub/dora-microphone
path: dora-microphone
_unstable_deploy:
machine: macbook
inputs:
tick: dora/timer/millis/2000
outputs:
- audio

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
_unstable_deploy:
machine: macbook
path: dora-vad
inputs:
audio: dora-microphone/audio
outputs:
- audio

- id: dora-distil-whisper
build: pip install -e ../../node-hub/dora-distil-whisper
_unstable_deploy:
machine: macbook
path: dora-distil-whisper
inputs:
input: dora-vad/audio
outputs:
- text
env:
TARGET_LANGUAGE: english

- id: parse_whisper
path: parse_whisper.py
_unstable_deploy:
machine: gpu
inputs:
text: dora-distil-whisper/text
outputs:
- bbox
- action
- points
- text
env:
IMAGE_RESIZE_RATIO: "1.0"

- id: dora-qwenvl
build: pip install -e ../../node-hub/dora-qwen2-5-vl
path: dora-qwen2-5-vl
_unstable_deploy:
machine: gpu
inputs:
image_left: dav1d-remote/image_left
text: parse_whisper/text
outputs:
- text
env:
DEFAULT_QUESTION: Output the bounding box of the suitcase.
IMAGE_RESIZE_RATIO: "1.0"

- id: parse_bbox
path: parse_bbox.py
_unstable_deploy:
machine: gpu
inputs:
text: dora-qwenvl/text
points: parse_whisper/points
outputs:
- bbox
env:
IMAGE_RESIZE_RATIO: "1.0"

- id: tracker
build: pip install -e ../../node-hub/dora-cotracker
path: dora-cotracker
_unstable_deploy:
machine: gpu
inputs:
image: dav1d-remote/image_left
boxes2d: parse_bbox/bbox
outputs:
- tracked_image
- points
env:
INTERACTIVE_MODE: false

#- id: sam2
#build: pip install -e ../../node-hub/dora-sam2
#path: dora-sam2
#_unstable_deploy:
#machine: gpu
#inputs:
#image_left: dav1d-remote/image_left
#boxes2d: parse_bbox/bbox
#outputs:
#- masks

- id: parse_point
path: parse_point.py
_unstable_deploy:
machine: gpu
inputs:
points: tracker/points
outputs:
- action
env:
IMAGE_RESIZE_RATIO: "1.0"

- id: reachy-mobile-base
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-mobile-base
_unstable_deploy:
machine: encoder
inputs:
action_base: parse_point/action
action_whipser: parse_whisper/action
outputs:
- response_base
env:
ROBOT_IP: 127.0.0.1

- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
_unstable_deploy:
machine: macbook
inputs:
image: dav1d-remote/image_left
image_depth: dav1d-remote/image_depth
boxes2d: parse_bbox/bbox
original_text: dora-distil-whisper/text
parsed_text: parse_whisper/text
qwenvl_text: dora-qwenvl/text
tracked_image: tracker/tracked_image

+ 66
- 0
examples/reachy2-remote/parse_bbox.py View File

@@ -0,0 +1,66 @@
"""TODO: Add docstring."""

import json
import os

import numpy as np
import pyarrow as pa
from dora import Node

node = Node()

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


def extract_bboxes(json_text):
"""Extract bounding boxes from a JSON string with markdown markers and return them as a NumPy array.

Parameters
----------
json_text : str
JSON string containing bounding box data, including ```json markers.

Returns
-------
np.ndarray: NumPy array of bounding boxes.

"""
# Ensure all lines are stripped of whitespace and markers
lines = json_text.strip().splitlines()

# Filter out lines that are markdown markers
clean_lines = [line for line in lines if not line.strip().startswith("```")]

# Join the lines back into a single string
clean_text = "\n".join(clean_lines)
# Parse the cleaned JSON text
try:
data = json.loads(clean_text)

# Extract bounding boxes
bboxes = [item["bbox_2d"] for item in data]
labels = [item["label"] for item in data]

return np.array(bboxes), np.array(labels)
except Exception as _e: # noqa
pass
return None, None


for event in node:
if event["type"] == "INPUT":
if len(event["value"]) == 0:
node.send_output("bbox", pa.array([]))
continue

text = event["value"][0].as_py()
image_id = event["metadata"]["image_id"]

bboxes, labels = extract_bboxes(text)
if bboxes is not None and len(bboxes) > 0:
bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
node.send_output(
"bbox",
pa.array(bboxes.ravel()),
metadata={"encoding": "xyxy", "image_id": image_id},
)

+ 47
- 0
examples/reachy2-remote/parse_point.py View File

@@ -0,0 +1,47 @@
"""TODO: Add docstring."""

import json
import os

import numpy as np
import pyarrow as pa
from dora import Node

node = Node()

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


for event in node:
if event["type"] == "INPUT":
text = event["value"][0].as_py()
width = event["metadata"]["width"]
height = event["metadata"]["height"]
values = event["value"].to_numpy().reshape((-1, 2))
values = values * int(1 / IMAGE_RESIZE_RATIO)

# Do point 0 first
if len(values) == 0:
print("No points detected")
continue
elif len(values) > 1:
print("Multiple points detected, taking the first one")
point = values[0]

rz = int((width / 2) - point[0]) / (width / 2)
x_distance = min(height / 2, height - point[1])

if abs(rz) > 0.3:
rz = np.deg2rad(30) * np.sign(rz)
elif abs(rz) > 0.1:
rz = np.deg2rad(30) * np.sign(rz)
else:
x = 0

if x_distance > (height * 0.15):
x = 0.5
else:
x = 0
# Action
action = pa.array([x, 0, 0, 0, 0, rz])
node.send_output("action", action)

+ 75
- 0
examples/reachy2-remote/parse_whisper.py View File

@@ -0,0 +1,75 @@
"""TODO: Add docstring."""

import json
import os
import time

import numpy as np
import pyarrow as pa
from dora import Node

node = Node()

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


def extract_bboxes(json_text):
"""Extract bounding boxes from a JSON string with markdown markers and return them as a NumPy array.

Parameters
----------
json_text : str
JSON string containing bounding box data, including ```json markers.

Returns
-------
np.ndarray: NumPy array of bounding boxes.

"""
# Ensure all lines are stripped of whitespace and markers
lines = json_text.strip().splitlines()

# Filter out lines that are markdown markers
clean_lines = [line for line in lines if not line.strip().startswith("```")]

# Join the lines back into a single string
clean_text = "\n".join(clean_lines)
# Parse the cleaned JSON text
try:
data = json.loads(clean_text)

# Extract bounding boxes
bboxes = [item["bbox_2d"] for item in data]
labels = [item["label"] for item in data]

return np.array(bboxes), np.array(labels)
except Exception as _e: # noqa
pass
return None, None


for event in node:
if event["type"] == "INPUT":
text = event["value"][0].as_py().lower()

if "stop" in text:
node.send_output("points", pa.array([], type=pa.float64()))
elif "follow" in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object"
node.send_output("text", pa.array([text]), {"image_id": "image_left"})
elif "left" in text:
action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
time.sleep(0.25)
action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
time.sleep(0.25)
action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
node.send_output("points", pa.array([]))
node.send_output("action", action)
elif "right" in text:
action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
time.sleep(0.25)
action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
time.sleep(0.25)
action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
node.send_output("points", pa.array([]))
node.send_output("action", action)

+ 42
- 0
examples/reachy2-remote/whisper-dev.yml View File

@@ -0,0 +1,42 @@
nodes:
- id: dora-microphone
build: pip install -e ../../node-hub/dora-microphone
path: dora-microphone
_unstable_deploy:
machine: macbook
inputs:
tick: dora/timer/millis/2000
outputs:
- audio

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
_unstable_deploy:
machine: macbook
path: dora-vad
inputs:
audio: dora-microphone/audio
outputs:
- audio

- id: dora-distil-whisper
build: pip install -e ../../node-hub/dora-distil-whisper
_unstable_deploy:
machine: macbook
path: dora-distil-whisper
inputs:
input: dora-vad/audio
outputs:
- text
env:
TARGET_LANGUAGE: english
# For China
# USE_MODELSCOPE_HUB: true

- id: dora-rerun
build: cargo build -p dora-rerun --release
_unstable_deploy:
machine: macbook
path: dora-rerun
inputs:
original_text: dora-distil-whisper/text

+ 15
- 4
node-hub/dora-cotracker/dora_cotracker/main.py View File

@@ -25,6 +25,7 @@ class VideoTrackingNode:
self.accept_new_points = True
self.clicked_points = []
self.input_points = []
self.input_masks = []

def mouse_callback(self, event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
@@ -52,9 +53,9 @@ class VideoTrackingNode:
# Track points
pred_tracks, pred_visibility = self.model(
video_chunk,
queries=queries,
is_first_step=self.is_first_step,
grid_size=0,
queries=queries,
add_support_grid=False,
)
self.is_first_step = False
@@ -118,6 +119,8 @@ class VideoTrackingNode:
"num_points": len(visible_tracks),
"dtype": "float32",
"shape": (len(visible_tracks), 2),
"width": frame.shape[1],
"height": frame.shape[0],
},
)

@@ -153,7 +156,7 @@ class VideoTrackingNode:
cv2.imshow("Interactive Feed to track point", display_frame)
cv2.waitKey(1)

if event["id"] == "points":
elif event["id"] == "points":
if not self.accept_new_points:
continue
# Handle points from input_stream node
@@ -162,9 +165,13 @@ class VideoTrackingNode:
self.input_points = points_array.reshape((-1, 2)).tolist()
self.accept_new_points = False
self.is_first_step = True
if event["id"] == "boxes2d":
elif event["id"] == "boxes2d":
if not self.accept_new_points:
continue
if len(event["value"]) == 0:
self.input_points = []
self.is_first_step = True
continue

# Handle points from input_stream node
metadata = event["metadata"]
@@ -185,7 +192,11 @@ class VideoTrackingNode:
_labels = None

self.input_points = [
[int((x_min + x_max) / 2), int((y_min + y_max) / 2)]
[
int(x_min + (x_max - x_min) * 2 / 4),
int(y_min + (y_max - y_min) * i / 10),
]
for i in range(4, 7)
for x_min, y_min, x_max, y_max in boxes2d
]



Loading…
Cancel
Save