Browse Source

Pick place demo (#793)

Demo of reachy doing a pick and place exercice.
tags/v0.3.10-rc3
Haixuan Xavier Tao GitHub 10 months ago
parent
commit
269d23e592
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
16 changed files with 1470 additions and 653 deletions
  1. +481
    -506
      Cargo.lock
  2. +9
    -0
      apis/python/operator/src/lib.rs
  3. +81
    -0
      examples/reachy2/parse_bbox_minimal.py
  4. +151
    -0
      examples/reachy2/pick-place-dev.yml
  5. +401
    -0
      examples/reachy2/pick_place.py
  6. +1
    -0
      libraries/message/src/metadata.rs
  7. +1
    -1
      node-hub/dora-object-to-pose/Cargo.toml
  8. +133
    -100
      node-hub/dora-object-to-pose/src/lib.rs
  9. +1
    -1
      node-hub/dora-reachy2/dora_reachy2/camera.py
  10. +14
    -4
      node-hub/dora-reachy2/dora_reachy2/left_arm.py
  11. +14
    -4
      node-hub/dora-reachy2/dora_reachy2/right_arm.py
  12. +1
    -1
      node-hub/dora-rerun/Cargo.toml
  13. +1
    -1
      node-hub/dora-rerun/pyproject.toml
  14. +50
    -17
      node-hub/dora-rerun/src/boxes2d.rs
  15. +0
    -1
      node-hub/dora-rerun/src/lib.rs
  16. +131
    -17
      node-hub/dora-sam2/dora_sam2/main.py

+ 481
- 506
Cargo.lock
File diff suppressed because it is too large
View File


+ 9
- 0
apis/python/operator/src/lib.rs View File

@@ -199,6 +199,12 @@ pub fn pydict_to_metadata(dict: Option<Bound<'_, PyDict>>) -> Result<MetadataPar
{
let list: Vec<f64> = value.extract()?;
parameters.insert(key, Parameter::ListFloat(list))
} else if value.is_instance_of::<PyList>()
&& value.len()? > 0
&& value.get_item(0)?.is_exact_instance_of::<PyString>()
{
let list: Vec<String> = value.extract()?;
parameters.insert(key, Parameter::ListString(list))
} else {
println!("could not convert type {value}");
parameters.insert(key, Parameter::String(value.str()?.to_string()))
@@ -233,6 +239,9 @@ pub fn metadata_to_pydict<'a>(
Parameter::ListFloat(l) => dict
.set_item(k, l)
.context("Could not insert metadata into python dictionary")?,
Parameter::ListString(l) => dict
.set_item(k, l)
.context("Could not insert metadata into python dictionary")?,
}
}



+ 81
- 0
examples/reachy2/parse_bbox_minimal.py View File

@@ -0,0 +1,81 @@
import json
import os

import numpy as np
import pyarrow as pa
from dora import Node

node = Node()

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


def extract_bboxes(json_text) -> (np.ndarray, np.ndarray):
"""
Extracts bounding boxes from a JSON string with markdown markers and returns them as a NumPy array.

Parameters:
json_text (str): JSON string containing bounding box data, including ```json markers.

Returns:
np.ndarray: NumPy array of bounding boxes.
"""
# Ensure all lines are stripped of whitespace and markers
lines = json_text.strip().splitlines()

# Filter out lines that are markdown markers
clean_lines = [line for line in lines if not line.strip().startswith("```")]

# Join the lines back into a single string
clean_text = "\n".join(clean_lines)
# Parse the cleaned JSON text
try:
data = json.loads(clean_text)

# Extract bounding boxes
bboxes = [item["bbox_2d"] for item in data]
labels = [item["label"] for item in data]

return np.array(bboxes), np.array(labels)
except Exception as _e: # noqa
pass
return None, None


for event in node:
text = "Put the chocolate in the white plate"
if event["type"] == "INPUT":
if event["id"] == "prompt":
prompt = event["value"][0].as_py()

elif event["id"] == "text":
text = event["value"][0].as_py()
image_id = event["metadata"]["image_id"]

bboxes, labels = extract_bboxes(text)
if bboxes is not None and len(bboxes) > 0:
bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
unique_labels = np.unique(labels)
idx = []
order = []
for label in unique_labels:
if label in prompt:
# Get the index of the start of the label in the prompt
order.append(prompt.index(label))
idx.append(np.where(labels == label)[0][0])

if len(idx) == 0:
continue
# Reorder idx given the order
# print(idx, order)
idx = np.array(idx)[np.argsort(order)].ravel()
bboxes = bboxes[idx]
# Check for duplicated box
if len(np.unique(bboxes, axis=0)) != len(bboxes):
print("Duplicated box")
continue
node.send_output(
"bbox",
pa.array([{"bbox": bboxes.ravel(), "labels": labels[idx]}]),
metadata={"encoding": "xyxy", "image_id": image_id},
)

+ 151
- 0
examples/reachy2/pick-place-dev.yml View File

@@ -0,0 +1,151 @@
nodes:
- id: dora-microphone
build: pip install -e ../../node-hub/dora-microphone
path: dora-microphone
inputs:
tick: dora/timer/millis/2000
outputs:
- audio

- id: sam2
build: pip install -e ../../node-hub/dora-sam2
path: dora-sam2
inputs:
image_depth: reachy-camera/image_depth
boxes2d: parse_bbox/bbox
outputs:
- masks

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio: dora-microphone/audio
outputs:
- audio

- id: dora-distil-whisper
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
input: dora-vad/audio
outputs:
- text
env:
TARGET_LANGUAGE: english
TRANSLATE: true

- id: reachy-mobile-base
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-mobile-base
inputs:
action_base: state_machine/action_base
outputs:
- response_base

- id: reachy-left-arm
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-left-arm
inputs:
pose: state_machine/action_l_arm
outputs:
- response_l_arm

- id: reachy-right-arm
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-right-arm
inputs:
pose: state_machine/action_r_arm
outputs:
- response_r_arm

- id: reachy-camera
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-camera
inputs:
tick: dora/timer/millis/50
outputs:
- image_depth
- depth

- id: reachy-head
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-head
inputs:
boxes2d: parse_bbox/bbox_face
look: state_machine/look

- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
# camera_left/image_right: reachy-camera/image_right
camera_torso/image: reachy-camera/image_depth
text_response: dora-qwenvl/text
text_whisper: dora-distil-whisper/text
camera_torso/boxes2d: parse_bbox/bbox
camera_left/boxes2d_face: parse_bbox/bbox_face
env:
RERUN_MEMORY_LIMIT: "5%"

- id: dora-qwenvl
build: pip install -e ../../node-hub/dora-qwen2-5-vl
path: dora-qwen2-5-vl
inputs:
image_depth: reachy-camera/image_depth
# image_left: reachy-camera/image_left
text_1: dora/timer/millis/600
text_2: state_machine/text_vlm
outputs:
- text
env:
DEFAULT_QUESTION: grab human.
IMAGE_RESIZE_RATIO: "0.5"
# ACTIVATION_WORDS: grab pick give output take catch grabs picks gives output takes catches have
#SYSTEM_PROMPT: You're a robot.

- id: parse_bbox
path: parse_bbox_minimal.py
inputs:
text: dora-qwenvl/text
prompt: state_machine/prompt
outputs:
- bbox
- bbox_face
env:
IMAGE_RESIZE_RATIO: "0.5"

- id: box_coordinates
build: pip install -e ../../node-hub/dora-object-to-pose
path: dora-object-to-pose
inputs:
depth: reachy-camera/depth
masks: sam2/masks
outputs:
- pose

- id: keyboard
build: pip install -e ../../node-hub/dora-keyboard
path: dora-keyboard
inputs:
tick: dora/timer/millis/1000
outputs:
- char

- id: state_machine
path: pick_place.py
inputs:
text: dora-distil-whisper/text
response_base: reachy-mobile-base/response_base
response_r_arm: reachy-right-arm/response_r_arm
response_l_arm: reachy-left-arm/response_l_arm
pose: box_coordinates/pose
outputs:
- text_vlm
- action_r_arm
- action_base
- look
- action_l_arm
- prompt
env:
ACTIVATION_WORDS: grab pick give output take catch grabs picks gives output takes catches have put

+ 401
- 0
examples/reachy2/pick_place.py View File

@@ -0,0 +1,401 @@
# State Machine
import json
import os

import numpy as np
import pyarrow as pa
from dora import Node

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))
node = Node()

ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "").split()
TABLE_HEIGHT = float(os.getenv("TABLE_HEIGHT", "-0.41"))

l_init_pose = [
-7.0631310641087435,
-10.432298603362307,
24.429809104404114,
-132.15000828778648,
-1.5494749438811133,
-21.749917789205202,
8.099312596108344,
100,
]
r_init_pose = [
-5.60273587426976,
10.780818397272316,
-27.868146823156042,
-126.15650363072193,
3.961108018106834,
-35.43682799906162,
350.9236448374495,
100,
]
r_release_closed_pose = [
-26.1507947940993,
12.16735021387949,
-2.2657319092611976,
-97.63648867582175,
-19.91084837404425,
22.10184328619011,
366.71351223614494,
0,
]

r_release_opened_pose = [
-26.1507947940993,
12.16735021387949,
-2.2657319092611976,
-97.63648867582175,
-19.91084837404425,
22.10184328619011,
366.71351223614494,
100,
]

l_release_opened_pose = [
-30.04330081906935,
-7.415231584691132,
3.6972339048071468,
-97.7274736257555,
12.996718740452982,
30.838020649757016,
-1.5572310505704858,
0,
]

l_release_closed_pose = [
-30.04330081906935,
-7.415231584691132,
3.6972339048071468,
-97.7274736257555,
12.996718740452982,
30.838020649757016,
-1.5572310505704858,
100,
]

stop = True


def extract_bboxes(json_text) -> (np.ndarray, np.ndarray):
"""
Extracts bounding boxes from a JSON string with markdown markers and returns them as a NumPy array.

Parameters:
json_text (str): JSON string containing bounding box data, including ```json markers.

Returns:
np.ndarray: NumPy array of bounding boxes.
"""
# Ensure all lines are stripped of whitespace and markers
lines = json_text.strip().splitlines()

# Filter out lines that are markdown markers
clean_lines = [line for line in lines if not line.strip().startswith("```")]

# Join the lines back into a single string
clean_text = "\n".join(clean_lines)
# Parse the cleaned JSON text
try:
data = json.loads(clean_text)

# Extract bounding boxes
bboxes = [item["bbox_2d"] for item in data]
labels = [item["label"] for item in data]

return np.array(bboxes), np.array(labels)
except Exception as _e: # noqa
pass
return None, None


def handle_speech(last_text):
global stop
words = last_text.lower().split()
if len(ACTIVATION_WORDS) > 0 and any(word in ACTIVATION_WORDS for word in words):

node.send_output(
"text_vlm",
pa.array(
[
f"Given the prompt: {cache['text']}. Output the two bounding boxes for the two objects"
]
),
metadata={"image_id": "image_depth"},
)
node.send_output(
"prompt",
pa.array([cache["text"]]),
metadata={"image_id": "image_depth"},
)
print(f"sending: {cache['text']}")
stop = False


def wait_for_event(id, timeout=None, cache={}):

while True:
event = node.next(timeout=timeout)
if event is None:
cache["finished"] = True
return None, cache
if event["type"] == "INPUT":
cache[event["id"]] = event["value"]
if event["id"] == "text":
cache[event["id"]] = event["value"][0].as_py()
handle_speech(event["value"][0].as_py())
elif event["id"] == id:
return event["value"], cache

elif event["type"] == "ERROR":
return None, cache


def wait_for_events(ids: list[str], timeout=None, cache={}):
response = {}
while True:
event = node.next(timeout=timeout)
if event is None:
cache["finished"] = True
return None, cache
if event["type"] == "INPUT":
cache[event["id"]] = event["value"]
if event["id"] == "text":
cache[event["id"]] = event["value"][0].as_py()
handle_speech(event["value"][0].as_py())
elif event["id"] in ids:
response[event["id"]] = event["value"]
if len(response) == len(ids):
return response, cache
elif event["type"] == "ERROR":
return None, cache


def get_prompt():
text = wait_for_event(id="text", timeout=0.3)
if text is None:
return
text = text[0].as_py()

words = text.lower().split()
if len(ACTIVATION_WORDS) > 0 and all(
word not in ACTIVATION_WORDS for word in words
):
return
else:
return text


last_text = ""
cache = {"text": "Put the orange in the metal box"}

while True:
### === IDLE ===

node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)
_, cache = wait_for_events(
ids=["response_r_arm", "response_l_arm"], timeout=2, cache=cache
)
# handle_speech(cache["text"])

### === TURNING ===

# Trigger action once text from whisper is received
# Move left. Overwrite this with your desired movement..
# node.send_output("action_base", pa.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.57]))
# Look straight
# node.send_output("look", pa.array([0.3, 0, -0.1]))
# You can add additional actions here
# ...

# event = wait_for_event(id="response_base")[0].as_py()
# if not event:
## return to IDLE
# node.send_output("action_base", pa.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.57]))
# event = wait_for_event(id="response_base")[0].as_py()
# if event:
# continue
# else:
# break

### === GRABBING ===

# Trigger action once base is done moving
# node.send_output(
# "text_vlm",
# pa.array([f"Given the prompt: {text}. Output bounding box for this action"]),
# metadata={"image_id": "image_depth"},
# )
arm_holding_object = None
# Try pose and until one is successful
text, cache = wait_for_event(id="text", timeout=0.3, cache=cache)

if stop:
continue

while True:
values, cache = wait_for_event(id="pose", cache=cache)

if values is None:
continue
values = values.to_numpy().reshape((-1, 6))
if len(values) < 2:
continue
x = values[0][0]
y = values[0][1]
z = values[0][2]
dest_x = values[1][0]
dest_y = values[1][1]
dest_z = values[1][2]
x = x + 0.01
dest_x = dest_x - 0.05
print("x: ", x, " y: ", y, " z: ", z)

## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement.
z = np.max((z, TABLE_HEIGHT))
node.send_output("look", pa.array([x, y, z]))
trajectory = np.array(
[
[x, y, -0.16, 0, 0, 0, 100],
[x, y, z, 0, 0, 0, 0],
[x, y, -0.16, 0, 0, 0, 0],
]
).ravel()

if y < 0:
node.send_output(
"action_r_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.5"},
)
event, cache = wait_for_event(id="response_r_arm", timeout=5, cache=cache)
if event is not None and event[0].as_py():
print("Success")
arm_holding_object = "right"
break
else:
print("Failed: x: ", x, " y: ", y, " z: ", z)
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": "1.3"},
)
event, cache = wait_for_event(id="response_r_arm", cache=cache)
else:
y += 0.03
node.send_output(
"action_l_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.5"},
)
event, cache = wait_for_event(id="response_l_arm", timeout=5, cache=cache)
if event is not None and event[0].as_py():
print("Success")
arm_holding_object = "left"
break
else:
print("Failed")
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": "1.3"},
)
event, cache = wait_for_event(id="response_l_arm", cache=cache)
### === RELEASING ===

# Trigger action once r_arm is done moving
# node.send_output("action_base", pa.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.57]))
# event = wait_for_event(id="response_base")[0].as_py()

# if not event:
# print("Failed to move right")

# Trigger action to release object
if arm_holding_object == "right":
node.send_output(
"action_r_arm",
pa.array(
[
dest_x,
dest_y,
-0.16,
0,
0,
0,
100,
],
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_r_arm", cache=cache)
else:
node.send_output(
"action_l_arm",
pa.array(
[
dest_x,
dest_y,
-0.16,
0,
0,
0,
100,
]
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_l_arm", cache=cache)

if event is None or not event[0].as_py():
print("Failed to release object")
if arm_holding_object == "right":
node.send_output(
"action_r_arm",
pa.array(
[
x,
y,
z,
0,
0,
0,
100,
],
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_r_arm", cache=cache)
else:
node.send_output(
"action_l_arm",
pa.array(
[
x,
y,
z,
0,
0,
0,
100,
]
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_l_arm", cache=cache)
else:
stop = True

if cache.get("finished", False):
break
# Move object back to initial position

+ 1
- 0
libraries/message/src/metadata.rs View File

@@ -63,6 +63,7 @@ pub enum Parameter {
String(String),
ListInt(Vec<i64>),
ListFloat(Vec<f64>),
ListString(Vec<String>),
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]


+ 1
- 1
node-hub/dora-object-to-pose/Cargo.toml View File

@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = "0.3.8"
dora-node-api = { workspace = true }
eyre = "0.6.8"
pyo3 = { workspace = true, features = [
"extension-module",


+ 133
- 100
node-hub/dora-object-to-pose/src/lib.rs View File

@@ -10,7 +10,7 @@ use dora_node_api::{
use eyre::Result;
use std::collections::HashMap;

fn points_to_pose(points: &[(f32, f32, f32)]) -> (f32, f32, f32, f32, f32, f32) {
fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> {
let (_x, _y, _z, sum_xy, sum_x2, sum_y2, n, x_min, x_max, y_min, y_max, z_min, z_max) =
points.iter().fold(
(
@@ -61,7 +61,7 @@ fn points_to_pose(points: &[(f32, f32, f32)]) -> (f32, f32, f32, f32, f32, f32)
let std_y = (sum_y2 / n - mean_y * mean_y).sqrt();
let corr = cov / (std_x * std_y);

return (mean_x, mean_y, mean_z, 0., 0., corr * f32::consts::PI / 2.);
return vec![mean_x, mean_y, mean_z, 0., 0., corr * f32::consts::PI / 2.];
}

pub fn lib_main() -> Result<()> {
@@ -74,7 +74,7 @@ pub fn lib_main() -> Result<()> {
let mut focal_length = vec![605, 605];
let mut resolution = vec![605, 605];
let camera_pitch = std::env::var("CAMERA_PITCH")
.unwrap_or("2.478".to_string())
.unwrap_or("2.47".to_string())
.parse::<f32>()
.unwrap();
let cos_theta = camera_pitch.cos(); // np.cos(np.deg2rad(180-38))
@@ -120,114 +120,147 @@ pub fn lib_main() -> Result<()> {
depth_frame = Some(buffer.clone());
}
"masks" => {
if let Some(data) = data.as_primitive_opt::<Float32Type>() {
let data = data.values();
let mut points = vec![];
let mut z_total = 0.;
let mut n = 0.;
let masks = if let Some(data) = data.as_primitive_opt::<Float32Type>() {
let data = data
.iter()
.map(|x| if let Some(x) = x { x > 0. } else { false })
.collect::<Vec<_>>();
data
} else if let Some(data) = data.as_boolean_opt() {
let data = data
.iter()
.map(|x| if let Some(x) = x { x } else { false })
.collect::<Vec<_>>();
data
} else {
println!("Got unexpected data type: {}", data.data_type());
continue;
};

if let Some(depth_frame) = &depth_frame {
depth_frame.iter().enumerate().for_each(|(i, z)| {
let u = i as f32 % width as f32; // Calculate x-coordinate (u)
let v = i as f32 / width as f32; // Calculate y-coordinate (v)
let outputs: Vec<Vec<f32>> = masks
.chunks(height as usize * width as usize)
.into_iter()
.map(|data| {
let mut points = vec![];
let mut z_total = 0.;
let mut n = 0.;

if let Some(z) = z {
let z = z as f32;
// Skip points that have empty depth or is too far away
if z == 0. || z > 5.0 {
return;
}
if data[i] > 0. {
let y =
(u - resolution[0] as f32) * z / focal_length[0] as f32;
let x =
(v - resolution[1] as f32) * z / focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;
if let Some(depth_frame) = &depth_frame {
depth_frame.iter().enumerate().for_each(|(i, z)| {
let u = i as f32 % width as f32; // Calculate x-coordinate (u)
let v = i as f32 / width as f32; // Calculate y-coordinate (v)

if let Some(z) = z {
let z = z as f32;
// Skip points that have empty depth or is too far away
if z == 0. || z > 20.0 {
return;
}
if data[i] {
let y = (u - resolution[0] as f32) * z
/ focal_length[0] as f32;
let x = (v - resolution[1] as f32) * z
/ focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;

points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
}
}
}
});
} else {
println!("No depth frame found");
continue;
}
if points.is_empty() {
println!("No points in mask found");
continue;
}
let (mean_x, mean_y, mean_z, rx, ry, rz) = points_to_pose(&points);
let mut metadata = metadata.parameters.clone();
metadata.insert(
"encoding".to_string(),
Parameter::String("xyzrpy".to_string()),
);
});
} else {
println!("No depth frame found");
return None;
}
if points.is_empty() {
println!("No points in mask found");
return None;
}
Some(points_to_pose(&points))
})
.filter(|x| x.is_some())
.map(|x| x.unwrap())
.collect();
let flatten_data = outputs.into_iter().flatten().collect::<Vec<_>>();
let mut metadata = metadata.parameters.clone();
metadata.insert(
"encoding".to_string(),
Parameter::String("xyzrpy".to_string()),
);
println!("Got data: {:?}", flatten_data);

node.send_output(
DataId::from("pose".to_string()),
metadata,
vec![mean_x, mean_y, mean_z, rx, ry, rz].into_arrow(),
)?;
}
node.send_output(
DataId::from("pose".to_string()),
metadata,
flatten_data.into_arrow(),
)?;
}
"boxes2d" => {
if let Some(data) = data.as_primitive_opt::<Int64Type>() {
let data = data.values();
let x_min = data[0] as f32;
let y_min = data[1] as f32;
let x_max = data[2] as f32;
let y_max = data[3] as f32;
let mut points = vec![];
let mut z_min = 100.;
let mut z_total = 0.;
let mut n = 0.;
let values = data.values();
let outputs: Vec<Vec<f32>> = values
.chunks(4)
.into_iter()
.map(|data| {
let x_min = data[0] as f32;
let y_min = data[1] as f32;
let x_max = data[2] as f32;
let y_max = data[3] as f32;
let mut points = vec![];
let mut z_min = 100.;
let mut z_total = 0.;
let mut n = 0.;

if let Some(depth_frame) = &depth_frame {
depth_frame.iter().enumerate().for_each(|(i, z)| {
let u = i as f32 % width as f32; // Calculate x-coordinate (u)
let v = i as f32 / width as f32; // Calculate y-coordinate (v)
if let Some(depth_frame) = &depth_frame {
depth_frame.iter().enumerate().for_each(|(i, z)| {
let u = i as f32 % width as f32; // Calculate x-coordinate (u)
let v = i as f32 / width as f32; // Calculate y-coordinate (v)

if let Some(z) = z {
let z = z as f32;
// Skip points that have empty depth or is too far away
if z == 0. || z > 5.0 {
return;
}
if u > x_min && u < x_max && v > y_min && v < y_max {
let y =
(u - resolution[0] as f32) * z / focal_length[0] as f32;
let x =
(v - resolution[1] as f32) * z / focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;
if new_z < z_min {
z_min = new_z;
if let Some(z) = z {
let z = z as f32;
// Skip points that have empty depth or is too far away
if z == 0. || z > 5.0 {
return;
}
if u > x_min && u < x_max && v > y_min && v < y_max {
let y = (u - resolution[0] as f32) * z
/ focal_length[0] as f32;
let x = (v - resolution[1] as f32) * z
/ focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;
if new_z < z_min {
z_min = new_z;
}
points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
}
}
points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
}
});
} else {
println!("No depth frame found");
return None;
}
});
} else {
println!("No depth frame found");
continue;
}
if points.is_empty() {
continue;
}
let raw_mean_z = z_total / n as f32;
let threshold = (raw_mean_z + z_min) / 2.;
let points = points
.into_iter()
.filter(|(_x, _y, z)| z > &threshold)
.collect::<Vec<_>>();
let (mean_x, mean_y, mean_z, rx, ry, rz) = points_to_pose(&points);
if points.is_empty() {
return None;
}
let raw_mean_z = z_total / n as f32;
let threshold = (raw_mean_z + z_min) / 2.;
let points = points
.into_iter()
.filter(|(_x, _y, z)| z > &threshold)
.collect::<Vec<_>>();
Some(points_to_pose(&points))
})
.filter(|x| x.is_some())
.map(|x| x.unwrap())
.collect();
let flatten_data = outputs.into_iter().flatten().collect::<Vec<_>>();
let mut metadata = metadata.parameters.clone();
metadata.insert(
"encoding".to_string(),
@@ -237,7 +270,7 @@ pub fn lib_main() -> Result<()> {
node.send_output(
DataId::from("pose".to_string()),
metadata,
vec![mean_x, mean_y, mean_z, rx, ry, rz].into_arrow(),
flatten_data.into_arrow(),
)?;
}
}


+ 1
- 1
node-hub/dora-reachy2/dora_reachy2/camera.py View File

@@ -10,7 +10,7 @@ from reachy2_sdk.media.camera import CameraView
def main():
ROBOT_IP = os.getenv("ROBOT_IP", "10.42.0.80")

for _ in range(5):
for _ in range(10):
reachy = ReachySDK(ROBOT_IP)
try:
reachy.cameras.teleop.get_frame(view=CameraView.LEFT)


+ 14
- 4
node-hub/dora-reachy2/dora_reachy2/left_arm.py View File

@@ -78,14 +78,14 @@ def manage_gripper(reachy, gripper, grasp):
return True
if gripper == 0.0:
reachy.l_arm.gripper.close()
time.sleep(0.5)
time.sleep(0.3)
if grasp:
half_open = reachy.l_arm.gripper.get_current_opening() > 2
if not half_open:
return False
elif gripper == 100.0:
reachy.l_arm.gripper.open()
time.sleep(0.5)
time.sleep(0.3)
return True


@@ -133,7 +133,12 @@ def main():
)
else:
for joint, gripper in joint_values:
reachy.l_arm.goto(joint, duration=duration, wait=wait)
reachy.l_arm.goto(
joint,
duration=duration,
wait=wait,
interpolation_mode="linear",
)
response_gripper = manage_gripper(reachy, gripper, grasp)
if not response_gripper:
node.send_output(
@@ -151,7 +156,12 @@ def main():
joints = value[:7].tolist()
gripper = value[7]

reachy.l_arm.goto(joints, duration=duration, wait=wait)
reachy.l_arm.goto(
joints,
duration=duration,
wait=wait,
interpolation_mode="linear",
)
manage_gripper(reachy, gripper, grasp)
node.send_output("response_l_arm", pa.array([True]))



+ 14
- 4
node-hub/dora-reachy2/dora_reachy2/right_arm.py View File

@@ -77,14 +77,14 @@ def manage_gripper(reachy, gripper, grasp):
return True
if gripper == 0.0:
reachy.r_arm.gripper.close()
time.sleep(0.5)
time.sleep(0.3)
if grasp:
half_open = reachy.r_arm.gripper.get_current_opening() > 2
if not half_open:
return False
elif gripper == 100.0:
reachy.r_arm.gripper.open()
time.sleep(0.5)
time.sleep(0.3)
return True


@@ -132,7 +132,12 @@ def main():
)
else:
for joint, gripper in joint_values:
reachy.r_arm.goto(joint, duration=duration, wait=wait)
reachy.r_arm.goto(
joint,
duration=duration,
wait=wait,
interpolation_mode="linear",
)
response_gripper = manage_gripper(reachy, gripper, grasp)
if not response_gripper:
node.send_output(
@@ -150,7 +155,12 @@ def main():
joints = value[:7].tolist()
gripper = value[7]

reachy.r_arm.goto(joints, duration=duration, wait=wait)
reachy.r_arm.goto(
joints,
duration=duration,
wait=wait,
interpolation_mode="linear",
)
manage_gripper(reachy, gripper, grasp)
node.send_output("response_r_arm", pa.array([True]))



+ 1
- 1
node-hub/dora-rerun/Cargo.toml View File

@@ -17,7 +17,7 @@ python = ["pyo3"]
dora-node-api = { workspace = true, features = ["tracing"] }
eyre = "0.6.8"
tokio = { version = "1.24.2", features = ["rt"] }
rerun = { version = "0.21.0", features = ["web_viewer", "image"] }
rerun = { version = "0.22.0", features = ["web_viewer", "image"] }
ndarray = "0.15.6"
k = "0.32"
pyo3 = { workspace = true, features = [


+ 1
- 1
node-hub/dora-rerun/pyproject.toml View File

@@ -10,7 +10,7 @@ requires-python = ">=3.8"

dependencies = [
"maturin>=1.8.2",
'rerun_sdk==0.21.0',
'rerun_sdk==0.22.0',
# "rerun-loader-urdf @ git+https://github.com/rerun-io/rerun-loader-python-example-urdf.git",
]



+ 50
- 17
node-hub/dora-rerun/src/boxes2d.rs View File

@@ -1,7 +1,9 @@
use dora_node_api::{
arrow::{
array::AsArray,
datatypes::{Float32Type, Float64Type, Int32Type, Int64Type},
datatypes::{
DataType, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
},
},
dora_core::config::DataId,
ArrowData, Metadata, Parameter,
@@ -29,10 +31,53 @@ pub fn update_boxes2d(
.as_list_opt::<i32>()
.context("Could not deserialize bbox as list")?
.values();
let bbox = bbox
.as_primitive_opt::<Float32Type>()
.context("Could not get bbox value as list")?
.values();
let bbox = match bbox.data_type() {
DataType::Float16 => bbox
.as_primitive_opt::<Float16Type>()
.context("Failed to deserialize bbox")?
.values()
.iter()
.map(|x| f32::from(*x))
.collect(),
DataType::Float32 => bbox
.as_primitive_opt::<Float32Type>()
.context("Failed to deserialize bbox")?
.values()
.to_vec(),
DataType::Float64 => bbox
.as_primitive_opt::<Float64Type>()
.context("Failed to deserialize bbox")?
.values()
.iter()
.map(|x| *x as f32)
.collect(),
DataType::Int16 => bbox
.as_primitive_opt::<Int16Type>()
.context("Failed to deserialize bbox")?
.values()
.iter()
.map(|x| *x as f32)
.collect(),
DataType::Int32 => bbox
.as_primitive_opt::<Int32Type>()
.context("Failed to deserialize bbox")?
.values()
.iter()
.map(|x| *x as f32)
.collect(),
DataType::Int64 => bbox
.as_primitive_opt::<Int64Type>()
.context("Failed to deserialize bbox")?
.values()
.iter()
.map(|x| *x as f32)
.collect(),
_ => {
return Err(eyre::eyre!(
"Could not deserialize bbox as float32, float64, int32 or int64"
))
}
};

if bbox.len() == 0 {
rec.log(id.as_str(), &rerun::Clear::flat())
@@ -53,18 +98,6 @@ pub fn update_boxes2d(
.context("Could not deserialize labels as string")?;
let labels: Vec<Text> = labels.iter().map(|x| Text::from(x.unwrap())).collect();

// Cast confidence
let conf_buffer = bbox_struct
.column_by_name("conf")
.context("Did not find conf field within bbox struct")?;
let conf = conf_buffer
.as_list_opt::<i32>()
.context("Could not deserialize conf as list")?
.values();
let _conf = conf
.as_primitive_opt::<Float32Type>()
.context("Could not deserialize conf as string")?;

let mut centers = vec![];
let mut sizes = vec![];



+ 0
- 1
node-hub/dora-rerun/src/lib.rs View File

@@ -262,7 +262,6 @@ use pyo3::{
#[cfg(feature = "python")]
#[pyfunction]
fn py_main(_py: Python) -> eyre::Result<()> {
pyo3::prepare_freethreaded_python();
lib_main()
}



+ 131
- 17
node-hub/dora-sam2/dora_sam2/main.py View File

@@ -13,6 +13,10 @@ def main():
pa.array([]) # initialize pyarrow array
node = Node()
frames = {}
last_pred = None
labels = None
return_type = pa.Array
image_id = None
for event in node:
event_type = event["type"]

@@ -59,33 +63,143 @@ def main():
image = Image.fromarray(frame)
frames[event_id] = image

# TODO: Fix the tracking code for SAM2.
continue
if last_pred is not None:
with (
torch.inference_mode(),
torch.autocast(
"cuda",
dtype=torch.bfloat16,
),
):
predictor.set_image(frames[image_id])

new_logits = []
new_masks = []

if len(last_pred.shape) < 3:
last_pred = np.expand_dims(last_pred, 0)

for mask in last_pred:
mask = np.expand_dims(mask, 0) # Make shape: 1x256x256
masks, _, new_logit = predictor.predict(
mask_input=mask,
multimask_output=False,
)
if len(masks.shape) == 4:
masks = masks[:, 0, :, :]
else:
masks = masks[0, :, :]

masks = masks > 0
new_masks.append(masks)
new_logits.append(new_logit)
## Mask to 3 channel image

last_pred = np.concatenate(new_logits, axis=0)
masks = np.concatenate(new_masks, axis=0)

match return_type:
case pa.Array:
node.send_output(
"masks",
pa.array(masks.ravel()),
metadata={
"image_id": image_id,
"width": frames[image_id].width,
"height": frames[image_id].height,
},
)
case pa.StructArray:
node.send_output(
"masks",
pa.array(
[
{
"masks": masks.ravel(),
"labels": event["value"]["labels"],
}
]
),
metadata={
"image_id": image_id,
"width": frames[image_id].width,
"height": frames[image_id].height,
},
)

elif "boxes2d" in event_id:
boxes2d = event["value"].to_numpy()

if isinstance(event["value"], pa.StructArray):
boxes2d = event["value"][0].get("bbox").values.to_numpy()
labels = (
event["value"][0]
.get("labels")
.values.to_numpy(zero_copy_only=False)
)
return_type = pa.Array
else:
boxes2d = event["value"].to_numpy()
labels = None
return_type = pa.Array

metadata = event["metadata"]
encoding = metadata["encoding"]
if encoding != "xyxy":
raise RuntimeError(f"Unsupported boxes2d encoding: {encoding}")

boxes2d = boxes2d.reshape(-1, 4)
image_id = metadata["image_id"]
with torch.inference_mode(), torch.autocast(
"cuda",
dtype=torch.bfloat16,
with (
torch.inference_mode(),
torch.autocast(
"cuda",
dtype=torch.bfloat16,
),
):
predictor.set_image(frames[image_id])
masks, _, _ = predictor.predict(box=boxes2d)
masks = masks[0]
## Mask to 3 channel image

node.send_output(
"masks",
pa.array(masks.ravel()),
metadata={
"image_id": image_id,
"width": frames[image_id].width,
"height": frames[image_id].height,
},
masks, _scores, last_pred = predictor.predict(
box=boxes2d, point_labels=labels, multimask_output=False
)

if len(masks.shape) == 4:
masks = masks[:, 0, :, :]
last_pred = last_pred[:, 0, :, :]
else:
masks = masks[0, :, :]
last_pred = last_pred[0, :, :]

masks = masks > 0
## Mask to 3 channel image
match return_type:
case pa.Array:
node.send_output(
"masks",
pa.array(masks.ravel()),
metadata={
"image_id": image_id,
"width": frames[image_id].width,
"height": frames[image_id].height,
},
)
case pa.StructArray:
node.send_output(
"masks",
pa.array(
[
{
"masks": masks.ravel(),
"labels": event["value"]["labels"],
}
]
),
metadata={
"image_id": image_id,
"width": frames[image_id].width,
"height": frames[image_id].height,
},
)

elif event_type == "ERROR":
print("Event Error:" + event["error"])



Loading…
Cancel
Save