Browse Source

Minor improvement

tags/v0.3.12-rc0
haixuanTao 9 months ago
parent
commit
04c06ec185
8 changed files with 502 additions and 63 deletions
  1. +84
    -26
      examples/reachy2-remote/dataflow_reachy.yml
  2. +5
    -2
      examples/reachy2-remote/parse_bbox.py
  3. +14
    -8
      examples/reachy2-remote/parse_point.py
  4. +291
    -0
      examples/reachy2-remote/parse_pose.py
  5. +16
    -3
      examples/reachy2-remote/parse_whisper.py
  6. +33
    -21
      node-hub/dora-object-to-pose/src/lib.rs
  7. +3
    -1
      node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
  8. +56
    -2
      node-hub/dora-sam2/dora_sam2/main.py

+ 84
- 26
examples/reachy2-remote/dataflow_reachy.yml View File

@@ -4,17 +4,40 @@ nodes:
_unstable_deploy: _unstable_deploy:
machine: encoder machine: encoder
inputs: inputs:
tick: dora/timer/millis/10
tick: dora/timer/millis/20
outputs: outputs:
- image_left - image_left
- image_depth - image_depth
- depth - depth
env: env:
CAPTURE_PATH: 0
IMAGE_WIDTH: 640 IMAGE_WIDTH: 640
IMAGE_HEIGHT: 480 IMAGE_HEIGHT: 480
ROBOT_IP: 127.0.0.1 ROBOT_IP: 127.0.0.1


- id: reachy-left-arm
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-left-arm
_unstable_deploy:
machine: encoder
inputs:
pose: parse_pose/action_l_arm
outputs:
- response_l_arm
env:
ROBOT_IP: 127.0.0.1

- id: reachy-right-arm
build: pip install -e ../../node-hub/dora-reachy2
path: dora-reachy2-right-arm
_unstable_deploy:
machine: encoder
inputs:
pose: parse_pose/action_r_arm
outputs:
- response_r_arm
env:
ROBOT_IP: 127.0.0.1

- id: rav1e-local-image - id: rav1e-local-image
path: dora-rav1e path: dora-rav1e
build: cargo build -p dora-rav1e --release build: cargo build -p dora-rav1e --release
@@ -26,10 +49,21 @@ nodes:
outputs: outputs:
- image_left - image_left
- image_depth - image_depth
- depth
env: env:
RAV1E_SPEED: 10 RAV1E_SPEED: 10


- id: rav1e-local-depth
path: dora-rav1e
build: cargo build -p dora-rav1e --release
_unstable_deploy:
machine: encoder
inputs:
depth: camera/depth
outputs:
- depth
env:
RAV1E_SPEED: 7

- id: dav1d-remote - id: dav1d-remote
path: dora-dav1d path: dora-dav1d
build: cargo build -p dora-dav1d --release build: cargo build -p dora-dav1d --release
@@ -38,7 +72,7 @@ nodes:
inputs: inputs:
image_depth: rav1e-local-image/image_depth image_depth: rav1e-local-image/image_depth
image_left: rav1e-local-image/image_left image_left: rav1e-local-image/image_left
# depth: rav1e-local/depth
depth: rav1e-local-depth/depth
outputs: outputs:
- image_left - image_left
- image_depth - image_depth
@@ -87,6 +121,8 @@ nodes:
- action - action
- points - points
- text - text
- action_release_left
- action_release_right
env: env:
IMAGE_RESIZE_RATIO: "1.0" IMAGE_RESIZE_RATIO: "1.0"


@@ -118,6 +154,17 @@ nodes:
env: env:
IMAGE_RESIZE_RATIO: "1.0" IMAGE_RESIZE_RATIO: "1.0"


- id: sam2
build: pip install -e ../../node-hub/dora-sam2
path: dora-sam2
_unstable_deploy:
machine: gpu
inputs:
image_depth: dav1d-remote/image_depth
boxes2d: parse_bbox/bbox_grab
outputs:
- masks

- id: tracker - id: tracker
build: pip install -e ../../node-hub/dora-cotracker build: pip install -e ../../node-hub/dora-cotracker
path: dora-cotracker path: dora-cotracker
@@ -132,24 +179,32 @@ nodes:
env: env:
INTERACTIVE_MODE: false INTERACTIVE_MODE: false


# - id: box_coordinates
# build: pip install -e ../../node-hub/dora-object-to-pose
# path: dora-object-to-pose
# inputs:
# depth: reachy-camera/depth
# boxes2d: parse_bbox/bbox
# outputs:
# - pose
#- id: sam2
#build: pip install -e ../../node-hub/dora-sam2
#path: dora-sam2
#_unstable_deploy:
#machine: gpu
#inputs:
#image_left: dav1d-remote/image_left
#boxes2d: parse_bbox/bbox
#outputs:
#- masks
- id: box_coordinates
build: pip install -e ../../node-hub/dora-object-to-pose
path: dora-object-to-pose
_unstable_deploy:
machine: gpu
inputs:
depth: dav1d-remote/depth
masks: sam2/masks
outputs:
- pose

- id: parse_pose
path: parse_pose.py
_unstable_deploy:
machine: gpu
inputs:
pose: box_coordinates/pose
response_r_arm: reachy-right-arm/response_r_arm
response_l_arm: reachy-left-arm/response_l_arm
release_left: parse_whisper/action_release_left
release_right: parse_whisper/action_release_right
outputs:
- action_r_arm
- action_l_arm
env:
IMAGE_RESIZE_RATIO: "1.0"


- id: parse_point - id: parse_point
path: parse_point.py path: parse_point.py
@@ -179,12 +234,15 @@ nodes:
build: pip install -e ../../node-hub/dora-rerun build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun path: dora-rerun
_unstable_deploy: _unstable_deploy:
machine: macbook
machine: gpu
inputs: inputs:
image: dav1d-remote/image_left image: dav1d-remote/image_left
image_depth: dav1d-remote/image_depth
boxes2d: parse_bbox/bbox
torso/image: dav1d-remote/image_depth
torso/depth: dav1d-remote/depth
torso/boxes2d: parse_bbox/bbox
original_text: dora-distil-whisper/text original_text: dora-distil-whisper/text
parsed_text: parse_whisper/text parsed_text: parse_whisper/text
qwenvl_text: dora-qwenvl/text qwenvl_text: dora-qwenvl/text
tracked_image: tracker/tracked_image
env:
RERUN_MEMORY_LIMIT: 5%
CAMERA_PITCH: 2.47

+ 5
- 2
examples/reachy2-remote/parse_bbox.py View File

@@ -54,20 +54,23 @@ for event in node:
continue continue


text = event["value"][0].as_py() text = event["value"][0].as_py()
metadata = event["metadata"]
image_id = event["metadata"]["image_id"] image_id = event["metadata"]["image_id"]


bboxes, labels = extract_bboxes(text) bboxes, labels = extract_bboxes(text)
if bboxes is not None and len(bboxes) > 0: if bboxes is not None and len(bboxes) > 0:
bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
metadata["image_id"] = image_id
metadata["encoding"] = "xyxy"
if image_id == "image_left": if image_id == "image_left":
node.send_output( node.send_output(
"bbox_track", "bbox_track",
pa.array(bboxes.ravel()), pa.array(bboxes.ravel()),
metadata={"encoding": "xyxy", "image_id": image_id},
metadata,
) )
elif image_id == "image_depth": elif image_id == "image_depth":
node.send_output( node.send_output(
"bbox_grab", "bbox_grab",
pa.array(bboxes.ravel()), pa.array(bboxes.ravel()),
metadata={"encoding": "xyxy", "image_id": image_id},
metadata,
) )

+ 14
- 8
examples/reachy2-remote/parse_point.py View File

@@ -29,18 +29,24 @@ for event in node:
point = values[-1] point = values[-1]


rz = int((width / 2) - point[0]) / (width / 2) rz = int((width / 2) - point[0]) / (width / 2)
x_distance = min(height / 2, height - point[1])

if abs(rz) > 0.3:
rz = np.deg2rad(30) * np.sign(rz)
x_distance = min(height, height - point[1])

if abs(rz) > 0.75:
rz = np.deg2rad(90) * np.sign(rz)
if abs(rz) > 0.5:
rz = np.deg2rad(60) * np.sign(rz)
elif abs(rz) > 0.3:
rz = np.deg2rad(55) * np.sign(rz)
elif abs(rz) > 0.1: elif abs(rz) > 0.1:
rz = np.deg2rad(20) * np.sign(rz)
rz = np.deg2rad(45) * np.sign(rz)
else: else:
x = 0 x = 0


if x_distance > (height * 0.3):
x = 0.7
elif x_distance > (height * 0.15):
if x_distance > (height * 0.7):
x = 0.5
elif x_distance > (height * 0.5):
x = 0.5
elif x_distance > (height * 0.2):
x = 0.5 x = 0.5
else: else:
x = 0 x = 0


+ 291
- 0
examples/reachy2-remote/parse_pose.py View File

@@ -0,0 +1,291 @@
"""TODO: Add docstring."""

import json
import os

import numpy as np
import pyarrow as pa
from dora import Node

node = Node()

IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


l_init_pose = [
-7.0631310641087435,
-10.432298603362307,
24.429809104404114,
-132.15000828778648,
-1.5494749438811133,
-21.749917789205202,
8.099312596108344,
100,
]
r_init_pose = [
-5.60273587426976,
10.780818397272316,
-27.868146823156042,
-126.15650363072193,
3.961108018106834,
-35.43682799906162,
350.9236448374495,
100,
]
r_release_closed_pose = [
-26.1507947940993,
12.16735021387949,
-2.2657319092611976,
-97.63648867582175,
-19.91084837404425,
22.10184328619011,
366.71351223614494,
0,
]

r_release_opened_pose = [
-26.1507947940993,
12.16735021387949,
-2.2657319092611976,
-97.63648867582175,
-19.91084837404425,
22.10184328619011,
366.71351223614494,
100,
]

l_release_opened_pose = [
-30.04330081906935,
-7.415231584691132,
3.6972339048071468,
-97.7274736257555,
12.996718740452982,
30.838020649757016,
-1.5572310505704858,
0,
]

l_release_closed_pose = [
-30.04330081906935,
-7.415231584691132,
3.6972339048071468,
-97.7274736257555,
12.996718740452982,
30.838020649757016,
-1.5572310505704858,
100,
]


def wait_for_event(id, timeout=None, cache={}):
"""TODO: Add docstring."""
while True:
event = node.next(timeout=timeout)
if event is None:
cache["finished"] = True
return None, cache
if event["type"] == "INPUT":
cache[event["id"]] = event["value"]
if event["id"] == id:
return event["value"], cache

elif event["type"] == "ERROR":
return None, cache


arm_holding_object = None
cache = {}


## ---- INIT ---
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": 2},
)
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": 2},
)

for event in node:
if event["type"] == "INPUT":
if event["id"] == "pose":
values = event["value"]
values = values.to_numpy()
print("Pose: ", values)
if len(values) == 0:
continue
x = values[0]
y = values[1]
z = values[2]
action = event["metadata"]["action"]

match action:
case "grab":
if len(values) == 0:
continue
x = x + 0.03

## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement.
trajectory = np.array(
[
[x, y, -0.16, 0, 0, 0, 100],
[x, y, z, 0, 0, 0, 0],
[x, y, -0.16, 0, 0, 0, 0],
],
).ravel()

if y < 0:
node.send_output(
"action_r_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event = wait_for_event(id="response_r_arm", timeout=5)
if event is not None and event[0]:
print("Success")
arm_holding_object = "right"
node.send_output(
"action_r_arm",
pa.array([0.1, -0.2, -0.16, 0, 0, 0, 0]),
metadata={"encoding": "xyzrpy", "duration": "1"},
)
else:
print("Failed: x: ", x, " y: ", y, " z: ", z)
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": "1"},
)
event = wait_for_event(id="response_r_arm")
else:
y += 0.03
node.send_output(
"action_l_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event = wait_for_event(id="response_l_arm", timeout=5)
if event is not None and event[0]:
print("Success")
arm_holding_object = "left"
node.send_output(
"action_l_arm",
pa.array([0.1, 0.2, -0.16, 0, 0, 0, 0]),
metadata={"encoding": "xyzrpy", "duration": "1"},
)
else:
print("Failed")
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": "1"},
)
event = wait_for_event(id="response_l_arm")
case "release":
if len(values) == 0:
continue
x = x + 0.03

## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement.
trajectory = np.array(
[
[x, y, -0.16, 0, 0, 0, 100],
],
).ravel()

if y < 0:
node.send_output(
"action_r_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event = wait_for_event(id="response_r_arm", timeout=5)
if event is not None and event[0]:
print("Success")
arm_holding_object = "right"
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)
else:
print("Failed: x: ", x, " y: ", y, " z: ", z)
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": "1"},
)
event = wait_for_event(id="response_r_arm")
else:
y += 0.03
node.send_output(
"action_l_arm",
pa.array(trajectory),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event = wait_for_event(id="response_l_arm", timeout=5)
if event is not None and event[0]:
print("Success")
arm_holding_object = "left"
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)
else:
print("Failed")
node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": "1"},
)
event = wait_for_event(id="response_l_arm")

elif event["id"] == "release_right":
node.send_output(
"action_r_arm",
pa.array(
[
0.4,
0,
-0.16,
0,
0,
0,
100,
],
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_r_arm", cache=cache)
node.send_output(
"action_r_arm",
pa.array(r_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)
elif event["id"] == "release_left":
node.send_output(
"action_l_arm",
pa.array(
[
0.4,
0,
-0.16,
0,
0,
0,
100,
],
),
metadata={"encoding": "xyzrpy", "duration": "0.75"},
)
event, cache = wait_for_event(id="response_l_arm", cache=cache)

node.send_output(
"action_l_arm",
pa.array(l_init_pose),
metadata={"encoding": "jointstate", "duration": 1},
)

+ 16
- 3
examples/reachy2-remote/parse_whisper.py View File

@@ -59,8 +59,21 @@ for event in node:
node.send_output("text", pa.array([text]), {"image_id": "image_left"}) node.send_output("text", pa.array([text]), {"image_id": "image_left"})
elif "grab" in text: elif "grab" in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object" text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object"
node.send_output("text", pa.array([text]), {"image_id": "image_depth"})
elif "left" in text:
node.send_output(
"text", pa.array([text]), {"image_id": "image_depth", "action": "grab"}
)
elif "put " in text:
text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
node.send_output(
"text",
pa.array([text]),
{"image_id": "image_depth", "action": "release"},
)
elif "release left" in text:
node.send_output("action_release_left", pa.array([1.0]))
elif "release right" in text:
node.send_output("action_release_right", pa.array([1.0]))
elif "turn left" in text:
action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
node.send_output("action", action) node.send_output("action", action)
time.sleep(0.25) time.sleep(0.25)
@@ -70,7 +83,7 @@ for event in node:
action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
node.send_output("action", action) node.send_output("action", action)
node.send_output("points", pa.array([])) node.send_output("points", pa.array([]))
elif "right" in text:
elif "turn right" in text:
action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)]) action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
node.send_output("action", action) node.send_output("action", action)
time.sleep(0.25) time.sleep(0.25)


+ 33
- 21
node-hub/dora-object-to-pose/src/lib.rs View File

@@ -1,7 +1,7 @@
use core::f32; use core::f32;
use dora_node_api::{ use dora_node_api::{
arrow::{ arrow::{
array::{AsArray, Float64Array, UInt8Array},
array::{AsArray, Float64Array, UInt16Array, UInt8Array},
datatypes::{Float32Type, Int64Type}, datatypes::{Float32Type, Int64Type},
}, },
dora_core::config::DataId, dora_core::config::DataId,
@@ -11,7 +11,7 @@ use eyre::Result;
use std::collections::HashMap; use std::collections::HashMap;


fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> { fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> {
let (_x, _y, _z, sum_xy, sum_x2, sum_y2, n, x_min, x_max, y_min, y_max, z_min, z_max) =
let (sum_x, sum_y, sum_z, sum_xy, sum_x2, sum_y2, n, x_min, x_max, y_min, y_max, z_min, z_max) =
points.iter().fold( points.iter().fold(
( (
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, -10.0, 10.0, -10.0, 10., -10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, -10.0, 10.0, -10.0, 10., -10.0,
@@ -49,11 +49,7 @@ fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> {
) )
}, },
); );
let (mean_x, mean_y, mean_z) = (
(x_max + x_min) / 2.,
(y_max + y_min) / 2.,
(z_max + z_min) / 2.,
);
let (mean_x, mean_y, mean_z) = ((sum_x) / n, (sum_y) / n, (sum_z) / n);


// Compute covariance and standard deviations // Compute covariance and standard deviations
let cov = sum_xy / n - mean_x * mean_y; let cov = sum_xy / n - mean_x * mean_y;
@@ -116,7 +112,8 @@ pub fn lib_main() -> Result<()> {
} else { } else {
vec![640, 480] vec![640, 480]
}; };
let buffer: &Float64Array = data.as_any().downcast_ref().unwrap();
let buffer: &UInt16Array = data.as_any().downcast_ref().unwrap();

depth_frame = Some(buffer.clone()); depth_frame = Some(buffer.clone());
} }
"masks" => { "masks" => {
@@ -137,6 +134,8 @@ pub fn lib_main() -> Result<()> {
continue; continue;
}; };


let mut z_2 = 0.0;
let mut z_1 = 0.0;
let outputs: Vec<Vec<f32>> = masks let outputs: Vec<Vec<f32>> = masks
.chunks(height as usize * width as usize) .chunks(height as usize * width as usize)
.filter_map(|data| { .filter_map(|data| {
@@ -150,23 +149,36 @@ pub fn lib_main() -> Result<()> {
let v = i as f32 / width as f32; // Calculate y-coordinate (v) let v = i as f32 / width as f32; // Calculate y-coordinate (v)


if let Some(z) = z { if let Some(z) = z {
let z = z as f32;
let z = (z as f32) / 1000.;
// Skip points that have empty depth or is too far away // Skip points that have empty depth or is too far away
if z == 0. || z > 20.0 { if z == 0. || z > 20.0 {
return; return;
} }
if data[i] {
let y = (u - resolution[0] as f32) * z
/ focal_length[0] as f32;
let x = (v - resolution[1] as f32) * z
/ focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;
if z_2 == 0. && z_1 == 0. {
z_1 = z;
} else if z_1 == 0. {
z_2 = z_1;
z_1 = z;
} else if (z - z_2).abs() < 0.1 && (z - z_1).abs() < 0.1 {
z_2 = z_1;
z_1 = z;


points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
if data[i] {
let y = (u - resolution[0] as f32) * z
/ focal_length[0] as f32;
let x = (v - resolution[1] as f32) * z
/ focal_length[1] as f32;
let new_x = sin_theta * z + cos_theta * x;
let new_y = -y;
let new_z = cos_theta * z - sin_theta * x;

points.push((new_x, new_y, new_z));
z_total += new_z;
n += 1.;
}
} else {
z_2 = z_1;
z_1 = z;
} }
} }
}); });
@@ -215,7 +227,7 @@ pub fn lib_main() -> Result<()> {
let v = i as f32 / width as f32; // Calculate y-coordinate (v) let v = i as f32 / width as f32; // Calculate y-coordinate (v)


if let Some(z) = z { if let Some(z) = z {
let z = z as f32;
let z = (z as f32) / 1000.;
// Skip points that have empty depth or is too far away // Skip points that have empty depth or is too far away
if z == 0. || z > 5.0 { if z == 0. || z > 5.0 {
return; return;


+ 3
- 1
node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py View File

@@ -229,10 +229,12 @@ def main():
past_key_values, past_key_values,
image_id, image_id,
) )
metadata = event["metadata"]
metadata["image_id"] = image_id if image_id is not None else "all"
node.send_output( node.send_output(
"text", "text",
pa.array([response]), pa.array([response]),
{"image_id": image_id if image_id is not None else "all"},
metadata,
) )


elif event_type == "ERROR": elif event_type == "ERROR":


+ 56
- 2
node-hub/dora-sam2/dora_sam2/main.py View File

@@ -133,7 +133,9 @@ def main():
) )


if "boxes2d" in event_id: if "boxes2d" in event_id:

if len(event["value"]) == 0:
node.send_output("masks", pa.array([]))
continue
if isinstance(event["value"], pa.StructArray): if isinstance(event["value"], pa.StructArray):
boxes2d = event["value"][0].get("bbox").values.to_numpy() boxes2d = event["value"][0].get("bbox").values.to_numpy()
labels = ( labels = (
@@ -162,7 +164,59 @@ def main():
): ):
predictor.set_image(frames[image_id]) predictor.set_image(frames[image_id])
masks, _scores, last_pred = predictor.predict( masks, _scores, last_pred = predictor.predict(
box=boxes2d, point_labels=labels, multimask_output=False,
box=boxes2d,
point_labels=labels,
multimask_output=False,
)

if len(masks.shape) == 4:
masks = masks[:, 0, :, :]
last_pred = last_pred[:, 0, :, :]
else:
masks = masks[0, :, :]
last_pred = last_pred[0, :, :]

masks = masks > 0
metadata["image_id"] = image_id
metadata["width"] = frames[image_id].width
metadata["height"] = frames[image_id].height
## Mask to 3 channel image
match return_type:
case pa.Array:
node.send_output("masks", pa.array(masks.ravel()), metadata)
case pa.StructArray:
node.send_output(
"masks",
pa.array(
[
{
"masks": masks.ravel(),
"labels": event["value"]["labels"],
},
],
),
metadata,
)
elif "points" in event_id:
points = event["value"].to_numpy().reshape((-1, 2))
return_type = pa.Array
if len(frames) == 0:
continue
first_image = next(iter(frames.keys()))
image_id = event["metadata"].get("image_id", first_image)
with (
torch.inference_mode(),
torch.autocast(
"cuda",
dtype=torch.bfloat16,
),
):
predictor.set_image(frames[image_id])
labels = [i for i in range(len(points))]
masks, _scores, last_pred = predictor.predict(
points,
point_labels=labels,
multimask_output=False,
) )


if len(masks.shape) == 4: if len(masks.shape) == 4:


Loading…
Cancel
Save