| @@ -4,17 +4,40 @@ nodes: | |||||
| _unstable_deploy: | _unstable_deploy: | ||||
| machine: encoder | machine: encoder | ||||
| inputs: | inputs: | ||||
| tick: dora/timer/millis/10 | |||||
| tick: dora/timer/millis/20 | |||||
| outputs: | outputs: | ||||
| - image_left | - image_left | ||||
| - image_depth | - image_depth | ||||
| - depth | - depth | ||||
| env: | env: | ||||
| CAPTURE_PATH: 0 | |||||
| IMAGE_WIDTH: 640 | IMAGE_WIDTH: 640 | ||||
| IMAGE_HEIGHT: 480 | IMAGE_HEIGHT: 480 | ||||
| ROBOT_IP: 127.0.0.1 | ROBOT_IP: 127.0.0.1 | ||||
| - id: reachy-left-arm | |||||
| build: pip install -e ../../node-hub/dora-reachy2 | |||||
| path: dora-reachy2-left-arm | |||||
| _unstable_deploy: | |||||
| machine: encoder | |||||
| inputs: | |||||
| pose: parse_pose/action_l_arm | |||||
| outputs: | |||||
| - response_l_arm | |||||
| env: | |||||
| ROBOT_IP: 127.0.0.1 | |||||
| - id: reachy-right-arm | |||||
| build: pip install -e ../../node-hub/dora-reachy2 | |||||
| path: dora-reachy2-right-arm | |||||
| _unstable_deploy: | |||||
| machine: encoder | |||||
| inputs: | |||||
| pose: parse_pose/action_r_arm | |||||
| outputs: | |||||
| - response_r_arm | |||||
| env: | |||||
| ROBOT_IP: 127.0.0.1 | |||||
| - id: rav1e-local-image | - id: rav1e-local-image | ||||
| path: dora-rav1e | path: dora-rav1e | ||||
| build: cargo build -p dora-rav1e --release | build: cargo build -p dora-rav1e --release | ||||
| @@ -26,10 +49,21 @@ nodes: | |||||
| outputs: | outputs: | ||||
| - image_left | - image_left | ||||
| - image_depth | - image_depth | ||||
| - depth | |||||
| env: | env: | ||||
| RAV1E_SPEED: 10 | RAV1E_SPEED: 10 | ||||
| - id: rav1e-local-depth | |||||
| path: dora-rav1e | |||||
| build: cargo build -p dora-rav1e --release | |||||
| _unstable_deploy: | |||||
| machine: encoder | |||||
| inputs: | |||||
| depth: camera/depth | |||||
| outputs: | |||||
| - depth | |||||
| env: | |||||
| RAV1E_SPEED: 7 | |||||
| - id: dav1d-remote | - id: dav1d-remote | ||||
| path: dora-dav1d | path: dora-dav1d | ||||
| build: cargo build -p dora-dav1d --release | build: cargo build -p dora-dav1d --release | ||||
| @@ -38,7 +72,7 @@ nodes: | |||||
| inputs: | inputs: | ||||
| image_depth: rav1e-local-image/image_depth | image_depth: rav1e-local-image/image_depth | ||||
| image_left: rav1e-local-image/image_left | image_left: rav1e-local-image/image_left | ||||
| # depth: rav1e-local/depth | |||||
| depth: rav1e-local-depth/depth | |||||
| outputs: | outputs: | ||||
| - image_left | - image_left | ||||
| - image_depth | - image_depth | ||||
| @@ -87,6 +121,8 @@ nodes: | |||||
| - action | - action | ||||
| - points | - points | ||||
| - text | - text | ||||
| - action_release_left | |||||
| - action_release_right | |||||
| env: | env: | ||||
| IMAGE_RESIZE_RATIO: "1.0" | IMAGE_RESIZE_RATIO: "1.0" | ||||
| @@ -118,6 +154,17 @@ nodes: | |||||
| env: | env: | ||||
| IMAGE_RESIZE_RATIO: "1.0" | IMAGE_RESIZE_RATIO: "1.0" | ||||
| - id: sam2 | |||||
| build: pip install -e ../../node-hub/dora-sam2 | |||||
| path: dora-sam2 | |||||
| _unstable_deploy: | |||||
| machine: gpu | |||||
| inputs: | |||||
| image_depth: dav1d-remote/image_depth | |||||
| boxes2d: parse_bbox/bbox_grab | |||||
| outputs: | |||||
| - masks | |||||
| - id: tracker | - id: tracker | ||||
| build: pip install -e ../../node-hub/dora-cotracker | build: pip install -e ../../node-hub/dora-cotracker | ||||
| path: dora-cotracker | path: dora-cotracker | ||||
| @@ -132,24 +179,32 @@ nodes: | |||||
| env: | env: | ||||
| INTERACTIVE_MODE: false | INTERACTIVE_MODE: false | ||||
| # - id: box_coordinates | |||||
| # build: pip install -e ../../node-hub/dora-object-to-pose | |||||
| # path: dora-object-to-pose | |||||
| # inputs: | |||||
| # depth: reachy-camera/depth | |||||
| # boxes2d: parse_bbox/bbox | |||||
| # outputs: | |||||
| # - pose | |||||
| #- id: sam2 | |||||
| #build: pip install -e ../../node-hub/dora-sam2 | |||||
| #path: dora-sam2 | |||||
| #_unstable_deploy: | |||||
| #machine: gpu | |||||
| #inputs: | |||||
| #image_left: dav1d-remote/image_left | |||||
| #boxes2d: parse_bbox/bbox | |||||
| #outputs: | |||||
| #- masks | |||||
| - id: box_coordinates | |||||
| build: pip install -e ../../node-hub/dora-object-to-pose | |||||
| path: dora-object-to-pose | |||||
| _unstable_deploy: | |||||
| machine: gpu | |||||
| inputs: | |||||
| depth: dav1d-remote/depth | |||||
| masks: sam2/masks | |||||
| outputs: | |||||
| - pose | |||||
| - id: parse_pose | |||||
| path: parse_pose.py | |||||
| _unstable_deploy: | |||||
| machine: gpu | |||||
| inputs: | |||||
| pose: box_coordinates/pose | |||||
| response_r_arm: reachy-right-arm/response_r_arm | |||||
| response_l_arm: reachy-left-arm/response_l_arm | |||||
| release_left: parse_whisper/action_release_left | |||||
| release_right: parse_whisper/action_release_right | |||||
| outputs: | |||||
| - action_r_arm | |||||
| - action_l_arm | |||||
| env: | |||||
| IMAGE_RESIZE_RATIO: "1.0" | |||||
| - id: parse_point | - id: parse_point | ||||
| path: parse_point.py | path: parse_point.py | ||||
| @@ -179,12 +234,15 @@ nodes: | |||||
| build: pip install -e ../../node-hub/dora-rerun | build: pip install -e ../../node-hub/dora-rerun | ||||
| path: dora-rerun | path: dora-rerun | ||||
| _unstable_deploy: | _unstable_deploy: | ||||
| machine: macbook | |||||
| machine: gpu | |||||
| inputs: | inputs: | ||||
| image: dav1d-remote/image_left | image: dav1d-remote/image_left | ||||
| image_depth: dav1d-remote/image_depth | |||||
| boxes2d: parse_bbox/bbox | |||||
| torso/image: dav1d-remote/image_depth | |||||
| torso/depth: dav1d-remote/depth | |||||
| torso/boxes2d: parse_bbox/bbox | |||||
| original_text: dora-distil-whisper/text | original_text: dora-distil-whisper/text | ||||
| parsed_text: parse_whisper/text | parsed_text: parse_whisper/text | ||||
| qwenvl_text: dora-qwenvl/text | qwenvl_text: dora-qwenvl/text | ||||
| tracked_image: tracker/tracked_image | |||||
| env: | |||||
| RERUN_MEMORY_LIMIT: 5% | |||||
| CAMERA_PITCH: 2.47 | |||||
| @@ -54,20 +54,23 @@ for event in node: | |||||
| continue | continue | ||||
| text = event["value"][0].as_py() | text = event["value"][0].as_py() | ||||
| metadata = event["metadata"] | |||||
| image_id = event["metadata"]["image_id"] | image_id = event["metadata"]["image_id"] | ||||
| bboxes, labels = extract_bboxes(text) | bboxes, labels = extract_bboxes(text) | ||||
| if bboxes is not None and len(bboxes) > 0: | if bboxes is not None and len(bboxes) > 0: | ||||
| bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) | bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) | ||||
| metadata["image_id"] = image_id | |||||
| metadata["encoding"] = "xyxy" | |||||
| if image_id == "image_left": | if image_id == "image_left": | ||||
| node.send_output( | node.send_output( | ||||
| "bbox_track", | "bbox_track", | ||||
| pa.array(bboxes.ravel()), | pa.array(bboxes.ravel()), | ||||
| metadata={"encoding": "xyxy", "image_id": image_id}, | |||||
| metadata, | |||||
| ) | ) | ||||
| elif image_id == "image_depth": | elif image_id == "image_depth": | ||||
| node.send_output( | node.send_output( | ||||
| "bbox_grab", | "bbox_grab", | ||||
| pa.array(bboxes.ravel()), | pa.array(bboxes.ravel()), | ||||
| metadata={"encoding": "xyxy", "image_id": image_id}, | |||||
| metadata, | |||||
| ) | ) | ||||
| @@ -29,18 +29,24 @@ for event in node: | |||||
| point = values[-1] | point = values[-1] | ||||
| rz = int((width / 2) - point[0]) / (width / 2) | rz = int((width / 2) - point[0]) / (width / 2) | ||||
| x_distance = min(height / 2, height - point[1]) | |||||
| if abs(rz) > 0.3: | |||||
| rz = np.deg2rad(30) * np.sign(rz) | |||||
| x_distance = min(height, height - point[1]) | |||||
| if abs(rz) > 0.75: | |||||
| rz = np.deg2rad(90) * np.sign(rz) | |||||
| if abs(rz) > 0.5: | |||||
| rz = np.deg2rad(60) * np.sign(rz) | |||||
| elif abs(rz) > 0.3: | |||||
| rz = np.deg2rad(55) * np.sign(rz) | |||||
| elif abs(rz) > 0.1: | elif abs(rz) > 0.1: | ||||
| rz = np.deg2rad(20) * np.sign(rz) | |||||
| rz = np.deg2rad(45) * np.sign(rz) | |||||
| else: | else: | ||||
| x = 0 | x = 0 | ||||
| if x_distance > (height * 0.3): | |||||
| x = 0.7 | |||||
| elif x_distance > (height * 0.15): | |||||
| if x_distance > (height * 0.7): | |||||
| x = 0.5 | |||||
| elif x_distance > (height * 0.5): | |||||
| x = 0.5 | |||||
| elif x_distance > (height * 0.2): | |||||
| x = 0.5 | x = 0.5 | ||||
| else: | else: | ||||
| x = 0 | x = 0 | ||||
| @@ -0,0 +1,291 @@ | |||||
| """TODO: Add docstring.""" | |||||
| import json | |||||
| import os | |||||
| import numpy as np | |||||
| import pyarrow as pa | |||||
| from dora import Node | |||||
| node = Node() | |||||
| IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0")) | |||||
| l_init_pose = [ | |||||
| -7.0631310641087435, | |||||
| -10.432298603362307, | |||||
| 24.429809104404114, | |||||
| -132.15000828778648, | |||||
| -1.5494749438811133, | |||||
| -21.749917789205202, | |||||
| 8.099312596108344, | |||||
| 100, | |||||
| ] | |||||
| r_init_pose = [ | |||||
| -5.60273587426976, | |||||
| 10.780818397272316, | |||||
| -27.868146823156042, | |||||
| -126.15650363072193, | |||||
| 3.961108018106834, | |||||
| -35.43682799906162, | |||||
| 350.9236448374495, | |||||
| 100, | |||||
| ] | |||||
| r_release_closed_pose = [ | |||||
| -26.1507947940993, | |||||
| 12.16735021387949, | |||||
| -2.2657319092611976, | |||||
| -97.63648867582175, | |||||
| -19.91084837404425, | |||||
| 22.10184328619011, | |||||
| 366.71351223614494, | |||||
| 0, | |||||
| ] | |||||
| r_release_opened_pose = [ | |||||
| -26.1507947940993, | |||||
| 12.16735021387949, | |||||
| -2.2657319092611976, | |||||
| -97.63648867582175, | |||||
| -19.91084837404425, | |||||
| 22.10184328619011, | |||||
| 366.71351223614494, | |||||
| 100, | |||||
| ] | |||||
| l_release_opened_pose = [ | |||||
| -30.04330081906935, | |||||
| -7.415231584691132, | |||||
| 3.6972339048071468, | |||||
| -97.7274736257555, | |||||
| 12.996718740452982, | |||||
| 30.838020649757016, | |||||
| -1.5572310505704858, | |||||
| 0, | |||||
| ] | |||||
| l_release_closed_pose = [ | |||||
| -30.04330081906935, | |||||
| -7.415231584691132, | |||||
| 3.6972339048071468, | |||||
| -97.7274736257555, | |||||
| 12.996718740452982, | |||||
| 30.838020649757016, | |||||
| -1.5572310505704858, | |||||
| 100, | |||||
| ] | |||||
| def wait_for_event(id, timeout=None, cache={}): | |||||
| """TODO: Add docstring.""" | |||||
| while True: | |||||
| event = node.next(timeout=timeout) | |||||
| if event is None: | |||||
| cache["finished"] = True | |||||
| return None, cache | |||||
| if event["type"] == "INPUT": | |||||
| cache[event["id"]] = event["value"] | |||||
| if event["id"] == id: | |||||
| return event["value"], cache | |||||
| elif event["type"] == "ERROR": | |||||
| return None, cache | |||||
| arm_holding_object = None | |||||
| cache = {} | |||||
| ## ---- INIT --- | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(r_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 2}, | |||||
| ) | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(l_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 2}, | |||||
| ) | |||||
| for event in node: | |||||
| if event["type"] == "INPUT": | |||||
| if event["id"] == "pose": | |||||
| values = event["value"] | |||||
| values = values.to_numpy() | |||||
| print("Pose: ", values) | |||||
| if len(values) == 0: | |||||
| continue | |||||
| x = values[0] | |||||
| y = values[1] | |||||
| z = values[2] | |||||
| action = event["metadata"]["action"] | |||||
| match action: | |||||
| case "grab": | |||||
| if len(values) == 0: | |||||
| continue | |||||
| x = x + 0.03 | |||||
| ## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement. | |||||
| trajectory = np.array( | |||||
| [ | |||||
| [x, y, -0.16, 0, 0, 0, 100], | |||||
| [x, y, z, 0, 0, 0, 0], | |||||
| [x, y, -0.16, 0, 0, 0, 0], | |||||
| ], | |||||
| ).ravel() | |||||
| if y < 0: | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(trajectory), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_r_arm", timeout=5) | |||||
| if event is not None and event[0]: | |||||
| print("Success") | |||||
| arm_holding_object = "right" | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array([0.1, -0.2, -0.16, 0, 0, 0, 0]), | |||||
| metadata={"encoding": "xyzrpy", "duration": "1"}, | |||||
| ) | |||||
| else: | |||||
| print("Failed: x: ", x, " y: ", y, " z: ", z) | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(r_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": "1"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_r_arm") | |||||
| else: | |||||
| y += 0.03 | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(trajectory), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_l_arm", timeout=5) | |||||
| if event is not None and event[0]: | |||||
| print("Success") | |||||
| arm_holding_object = "left" | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array([0.1, 0.2, -0.16, 0, 0, 0, 0]), | |||||
| metadata={"encoding": "xyzrpy", "duration": "1"}, | |||||
| ) | |||||
| else: | |||||
| print("Failed") | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(l_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": "1"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_l_arm") | |||||
| case "release": | |||||
| if len(values) == 0: | |||||
| continue | |||||
| x = x + 0.03 | |||||
| ## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement. | |||||
| trajectory = np.array( | |||||
| [ | |||||
| [x, y, -0.16, 0, 0, 0, 100], | |||||
| ], | |||||
| ).ravel() | |||||
| if y < 0: | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(trajectory), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_r_arm", timeout=5) | |||||
| if event is not None and event[0]: | |||||
| print("Success") | |||||
| arm_holding_object = "right" | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(r_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 1}, | |||||
| ) | |||||
| else: | |||||
| print("Failed: x: ", x, " y: ", y, " z: ", z) | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(r_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": "1"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_r_arm") | |||||
| else: | |||||
| y += 0.03 | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(trajectory), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_l_arm", timeout=5) | |||||
| if event is not None and event[0]: | |||||
| print("Success") | |||||
| arm_holding_object = "left" | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(l_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 1}, | |||||
| ) | |||||
| else: | |||||
| print("Failed") | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(l_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": "1"}, | |||||
| ) | |||||
| event = wait_for_event(id="response_l_arm") | |||||
| elif event["id"] == "release_right": | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array( | |||||
| [ | |||||
| 0.4, | |||||
| 0, | |||||
| -0.16, | |||||
| 0, | |||||
| 0, | |||||
| 0, | |||||
| 100, | |||||
| ], | |||||
| ), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event, cache = wait_for_event(id="response_r_arm", cache=cache) | |||||
| node.send_output( | |||||
| "action_r_arm", | |||||
| pa.array(r_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 1}, | |||||
| ) | |||||
| elif event["id"] == "release_left": | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array( | |||||
| [ | |||||
| 0.4, | |||||
| 0, | |||||
| -0.16, | |||||
| 0, | |||||
| 0, | |||||
| 0, | |||||
| 100, | |||||
| ], | |||||
| ), | |||||
| metadata={"encoding": "xyzrpy", "duration": "0.75"}, | |||||
| ) | |||||
| event, cache = wait_for_event(id="response_l_arm", cache=cache) | |||||
| node.send_output( | |||||
| "action_l_arm", | |||||
| pa.array(l_init_pose), | |||||
| metadata={"encoding": "jointstate", "duration": 1}, | |||||
| ) | |||||
| @@ -59,8 +59,21 @@ for event in node: | |||||
| node.send_output("text", pa.array([text]), {"image_id": "image_left"}) | node.send_output("text", pa.array([text]), {"image_id": "image_left"}) | ||||
| elif "grab" in text: | elif "grab" in text: | ||||
| text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object" | text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object" | ||||
| node.send_output("text", pa.array([text]), {"image_id": "image_depth"}) | |||||
| elif "left" in text: | |||||
| node.send_output( | |||||
| "text", pa.array([text]), {"image_id": "image_depth", "action": "grab"} | |||||
| ) | |||||
| elif "put " in text: | |||||
| text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object" | |||||
| node.send_output( | |||||
| "text", | |||||
| pa.array([text]), | |||||
| {"image_id": "image_depth", "action": "release"}, | |||||
| ) | |||||
| elif "release left" in text: | |||||
| node.send_output("action_release_left", pa.array([1.0])) | |||||
| elif "release right" in text: | |||||
| node.send_output("action_release_right", pa.array([1.0])) | |||||
| elif "turn left" in text: | |||||
| action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) | action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) | ||||
| node.send_output("action", action) | node.send_output("action", action) | ||||
| time.sleep(0.25) | time.sleep(0.25) | ||||
| @@ -70,7 +83,7 @@ for event in node: | |||||
| action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) | action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)]) | ||||
| node.send_output("action", action) | node.send_output("action", action) | ||||
| node.send_output("points", pa.array([])) | node.send_output("points", pa.array([])) | ||||
| elif "right" in text: | |||||
| elif "turn right" in text: | |||||
| action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)]) | action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)]) | ||||
| node.send_output("action", action) | node.send_output("action", action) | ||||
| time.sleep(0.25) | time.sleep(0.25) | ||||
| @@ -1,7 +1,7 @@ | |||||
| use core::f32; | use core::f32; | ||||
| use dora_node_api::{ | use dora_node_api::{ | ||||
| arrow::{ | arrow::{ | ||||
| array::{AsArray, Float64Array, UInt8Array}, | |||||
| array::{AsArray, Float64Array, UInt16Array, UInt8Array}, | |||||
| datatypes::{Float32Type, Int64Type}, | datatypes::{Float32Type, Int64Type}, | ||||
| }, | }, | ||||
| dora_core::config::DataId, | dora_core::config::DataId, | ||||
| @@ -11,7 +11,7 @@ use eyre::Result; | |||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||
| fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> { | fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> { | ||||
| let (_x, _y, _z, sum_xy, sum_x2, sum_y2, n, x_min, x_max, y_min, y_max, z_min, z_max) = | |||||
| let (sum_x, sum_y, sum_z, sum_xy, sum_x2, sum_y2, n, x_min, x_max, y_min, y_max, z_min, z_max) = | |||||
| points.iter().fold( | points.iter().fold( | ||||
| ( | ( | ||||
| 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, -10.0, 10.0, -10.0, 10., -10.0, | 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, -10.0, 10.0, -10.0, 10., -10.0, | ||||
| @@ -49,11 +49,7 @@ fn points_to_pose(points: &[(f32, f32, f32)]) -> Vec<f32> { | |||||
| ) | ) | ||||
| }, | }, | ||||
| ); | ); | ||||
| let (mean_x, mean_y, mean_z) = ( | |||||
| (x_max + x_min) / 2., | |||||
| (y_max + y_min) / 2., | |||||
| (z_max + z_min) / 2., | |||||
| ); | |||||
| let (mean_x, mean_y, mean_z) = ((sum_x) / n, (sum_y) / n, (sum_z) / n); | |||||
| // Compute covariance and standard deviations | // Compute covariance and standard deviations | ||||
| let cov = sum_xy / n - mean_x * mean_y; | let cov = sum_xy / n - mean_x * mean_y; | ||||
| @@ -116,7 +112,8 @@ pub fn lib_main() -> Result<()> { | |||||
| } else { | } else { | ||||
| vec![640, 480] | vec![640, 480] | ||||
| }; | }; | ||||
| let buffer: &Float64Array = data.as_any().downcast_ref().unwrap(); | |||||
| let buffer: &UInt16Array = data.as_any().downcast_ref().unwrap(); | |||||
| depth_frame = Some(buffer.clone()); | depth_frame = Some(buffer.clone()); | ||||
| } | } | ||||
| "masks" => { | "masks" => { | ||||
| @@ -137,6 +134,8 @@ pub fn lib_main() -> Result<()> { | |||||
| continue; | continue; | ||||
| }; | }; | ||||
| let mut z_2 = 0.0; | |||||
| let mut z_1 = 0.0; | |||||
| let outputs: Vec<Vec<f32>> = masks | let outputs: Vec<Vec<f32>> = masks | ||||
| .chunks(height as usize * width as usize) | .chunks(height as usize * width as usize) | ||||
| .filter_map(|data| { | .filter_map(|data| { | ||||
| @@ -150,23 +149,36 @@ pub fn lib_main() -> Result<()> { | |||||
| let v = i as f32 / width as f32; // Calculate y-coordinate (v) | let v = i as f32 / width as f32; // Calculate y-coordinate (v) | ||||
| if let Some(z) = z { | if let Some(z) = z { | ||||
| let z = z as f32; | |||||
| let z = (z as f32) / 1000.; | |||||
| // Skip points that have empty depth or is too far away | // Skip points that have empty depth or is too far away | ||||
| if z == 0. || z > 20.0 { | if z == 0. || z > 20.0 { | ||||
| return; | return; | ||||
| } | } | ||||
| if data[i] { | |||||
| let y = (u - resolution[0] as f32) * z | |||||
| / focal_length[0] as f32; | |||||
| let x = (v - resolution[1] as f32) * z | |||||
| / focal_length[1] as f32; | |||||
| let new_x = sin_theta * z + cos_theta * x; | |||||
| let new_y = -y; | |||||
| let new_z = cos_theta * z - sin_theta * x; | |||||
| if z_2 == 0. && z_1 == 0. { | |||||
| z_1 = z; | |||||
| } else if z_1 == 0. { | |||||
| z_2 = z_1; | |||||
| z_1 = z; | |||||
| } else if (z - z_2).abs() < 0.1 && (z - z_1).abs() < 0.1 { | |||||
| z_2 = z_1; | |||||
| z_1 = z; | |||||
| points.push((new_x, new_y, new_z)); | |||||
| z_total += new_z; | |||||
| n += 1.; | |||||
| if data[i] { | |||||
| let y = (u - resolution[0] as f32) * z | |||||
| / focal_length[0] as f32; | |||||
| let x = (v - resolution[1] as f32) * z | |||||
| / focal_length[1] as f32; | |||||
| let new_x = sin_theta * z + cos_theta * x; | |||||
| let new_y = -y; | |||||
| let new_z = cos_theta * z - sin_theta * x; | |||||
| points.push((new_x, new_y, new_z)); | |||||
| z_total += new_z; | |||||
| n += 1.; | |||||
| } | |||||
| } else { | |||||
| z_2 = z_1; | |||||
| z_1 = z; | |||||
| } | } | ||||
| } | } | ||||
| }); | }); | ||||
| @@ -215,7 +227,7 @@ pub fn lib_main() -> Result<()> { | |||||
| let v = i as f32 / width as f32; // Calculate y-coordinate (v) | let v = i as f32 / width as f32; // Calculate y-coordinate (v) | ||||
| if let Some(z) = z { | if let Some(z) = z { | ||||
| let z = z as f32; | |||||
| let z = (z as f32) / 1000.; | |||||
| // Skip points that have empty depth or is too far away | // Skip points that have empty depth or is too far away | ||||
| if z == 0. || z > 5.0 { | if z == 0. || z > 5.0 { | ||||
| return; | return; | ||||
| @@ -229,10 +229,12 @@ def main(): | |||||
| past_key_values, | past_key_values, | ||||
| image_id, | image_id, | ||||
| ) | ) | ||||
| metadata = event["metadata"] | |||||
| metadata["image_id"] = image_id if image_id is not None else "all" | |||||
| node.send_output( | node.send_output( | ||||
| "text", | "text", | ||||
| pa.array([response]), | pa.array([response]), | ||||
| {"image_id": image_id if image_id is not None else "all"}, | |||||
| metadata, | |||||
| ) | ) | ||||
| elif event_type == "ERROR": | elif event_type == "ERROR": | ||||
| @@ -133,7 +133,9 @@ def main(): | |||||
| ) | ) | ||||
| if "boxes2d" in event_id: | if "boxes2d" in event_id: | ||||
| if len(event["value"]) == 0: | |||||
| node.send_output("masks", pa.array([])) | |||||
| continue | |||||
| if isinstance(event["value"], pa.StructArray): | if isinstance(event["value"], pa.StructArray): | ||||
| boxes2d = event["value"][0].get("bbox").values.to_numpy() | boxes2d = event["value"][0].get("bbox").values.to_numpy() | ||||
| labels = ( | labels = ( | ||||
| @@ -162,7 +164,59 @@ def main(): | |||||
| ): | ): | ||||
| predictor.set_image(frames[image_id]) | predictor.set_image(frames[image_id]) | ||||
| masks, _scores, last_pred = predictor.predict( | masks, _scores, last_pred = predictor.predict( | ||||
| box=boxes2d, point_labels=labels, multimask_output=False, | |||||
| box=boxes2d, | |||||
| point_labels=labels, | |||||
| multimask_output=False, | |||||
| ) | |||||
| if len(masks.shape) == 4: | |||||
| masks = masks[:, 0, :, :] | |||||
| last_pred = last_pred[:, 0, :, :] | |||||
| else: | |||||
| masks = masks[0, :, :] | |||||
| last_pred = last_pred[0, :, :] | |||||
| masks = masks > 0 | |||||
| metadata["image_id"] = image_id | |||||
| metadata["width"] = frames[image_id].width | |||||
| metadata["height"] = frames[image_id].height | |||||
| ## Mask to 3 channel image | |||||
| match return_type: | |||||
| case pa.Array: | |||||
| node.send_output("masks", pa.array(masks.ravel()), metadata) | |||||
| case pa.StructArray: | |||||
| node.send_output( | |||||
| "masks", | |||||
| pa.array( | |||||
| [ | |||||
| { | |||||
| "masks": masks.ravel(), | |||||
| "labels": event["value"]["labels"], | |||||
| }, | |||||
| ], | |||||
| ), | |||||
| metadata, | |||||
| ) | |||||
| elif "points" in event_id: | |||||
| points = event["value"].to_numpy().reshape((-1, 2)) | |||||
| return_type = pa.Array | |||||
| if len(frames) == 0: | |||||
| continue | |||||
| first_image = next(iter(frames.keys())) | |||||
| image_id = event["metadata"].get("image_id", first_image) | |||||
| with ( | |||||
| torch.inference_mode(), | |||||
| torch.autocast( | |||||
| "cuda", | |||||
| dtype=torch.bfloat16, | |||||
| ), | |||||
| ): | |||||
| predictor.set_image(frames[image_id]) | |||||
| labels = [i for i in range(len(points))] | |||||
| masks, _scores, last_pred = predictor.predict( | |||||
| points, | |||||
| point_labels=labels, | |||||
| multimask_output=False, | |||||
| ) | ) | ||||
| if len(masks.shape) == 4: | if len(masks.shape) == 4: | ||||