From dcb4dd0769b4723b643e3f773bbbea0103bb1af3 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Tue, 18 Feb 2025 16:41:16 +0100 Subject: [PATCH] fix image ratio reduction --- examples/reachy2/demo-dev.yml | 37 ++++++++++--------- examples/reachy2/parse_bbox.py | 5 ++- examples/reachy2/state_machine.py | 11 +++--- .../dora-qwen2-5-vl/dora_qwen2_5_vl/main.py | 4 +- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/examples/reachy2/demo-dev.yml b/examples/reachy2/demo-dev.yml index 52d86fe3..c77cf99f 100755 --- a/examples/reachy2/demo-dev.yml +++ b/examples/reachy2/demo-dev.yml @@ -34,7 +34,7 @@ nodes: outputs: - response_base env: - ROBOT_IP: 10.42.0.80 + ROBOT_IP: 172.17.134.85 - id: reachy-left-arm build: pip install -e ../../node-hub/dora-reachy2 @@ -44,7 +44,7 @@ nodes: outputs: - response_l_arm env: - ROBOT_IP: 10.42.0.80 + ROBOT_IP: 172.17.134.85 - id: reachy-right-arm build: pip install -e ../../node-hub/dora-reachy2 @@ -54,7 +54,7 @@ nodes: outputs: - response_r_arm env: - ROBOT_IP: 10.42.0.80 + ROBOT_IP: 172.17.134.85 - id: reachy-camera build: pip install -e ../../node-hub/dora-reachy2 @@ -67,7 +67,7 @@ nodes: - image_depth - depth env: - ROBOT_IP: 10.42.0.80 + ROBOT_IP: 172.17.134.85 - id: reachy-head build: pip install -e ../../node-hub/dora-reachy2 @@ -76,20 +76,20 @@ nodes: boxes2d: parse_bbox/bbox_face look: state_machine/look env: - ROBOT_IP: 10.42.0.80 + ROBOT_IP: 172.17.134.85 - - id: plot - build: pip install -e ../../node-hub/dora-rerun - path: dora-rerun - inputs: - camera_left/image_right: reachy-camera/image_right - camera_torso/image: reachy-camera/image_depth - text_response: dora-qwenvl/text - text_whisper: dora-distil-whisper/text - camera_torso/boxes2d: parse_bbox/bbox - camera_left/boxes2d_face: parse_bbox/bbox_face - env: - RERUN_MEMORY_LIMIT: 5% + #- id: plot + #build: pip install -e ../../node-hub/dora-rerun + #path: dora-rerun + #inputs: + #camera_left/image_right: reachy-camera/image_right + #camera_torso/image: reachy-camera/image_depth + #text_response: dora-qwenvl/text + #text_whisper: dora-distil-whisper/text + #camera_torso/boxes2d: parse_bbox/bbox + #camera_left/boxes2d_face: parse_bbox/bbox_face + #env: + #RERUN_MEMORY_LIMIT: 5% - id: dora-qwenvl build: pip install -e ../../node-hub/dora-qwen2-5-vl @@ -104,6 +104,7 @@ nodes: env: #ADAPTER_PATH: /home/peter/Documents/work/LLaMA-Factory/saves/qwen2.5_vl-felix/lora/sft/checkpoint-558 DEFAULT_QUESTION: grab human. + IMAGE_RESIZE_RATIO: "0.5" # ACTIVATION_WORDS: grab pick give output take catch grabs picks gives output takes catches have #SYSTEM_PROMPT: You're a robot. @@ -115,6 +116,8 @@ nodes: - bbox - bbox_face - action_arm + env: + IMAGE_RESIZE_RATIO: "0.5" - id: box_coordinates build: pip install -e ../../node-hub/dora-boxes2d-to-pose diff --git a/examples/reachy2/parse_bbox.py b/examples/reachy2/parse_bbox.py index bfa634c3..3e8a7f88 100644 --- a/examples/reachy2/parse_bbox.py +++ b/examples/reachy2/parse_bbox.py @@ -1,4 +1,5 @@ import json +import os import numpy as np import pyarrow as pa @@ -6,6 +7,8 @@ from dora import Node node = Node() +IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0")) + def extract_bboxes(json_text): """ @@ -45,7 +48,7 @@ for event in node: bboxes, labels = extract_bboxes(text) if bboxes is not None and len(bboxes) > 0: - bboxes = bboxes # * 2 + bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO) if "human" in labels[0] or "head" in labels[0]: node.send_output("bbox_face", pa.array(bboxes.ravel())) else: diff --git a/examples/reachy2/state_machine.py b/examples/reachy2/state_machine.py index a5e81b72..869aa605 100644 --- a/examples/reachy2/state_machine.py +++ b/examples/reachy2/state_machine.py @@ -1,4 +1,4 @@ -## State Machine +# State Machine import os import time @@ -105,18 +105,17 @@ def wait_for_events(ids: list[str]): while True: - ### === IDLE === node.send_output( "action_r_arm", pa.array(r_default_pose), - metadata={"encoding": "jointstate"}, + metadata={"encoding": "jointstate", "duration": 2}, ) node.send_output( "action_l_arm", pa.array(l_default_pose), - metadata={"encoding": "jointstate"}, + metadata={"encoding": "jointstate", "duration": 2}, ) wait_for_events(ids=["response_r_arm", "response_l_arm"]) @@ -182,7 +181,9 @@ while True: y = values[1] z = values[2] x = x + 0.04 - z = np.clip(z, -0.31, -0.22) + + ## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement. + z = np.clip(z, -0.32, -0.22) node.send_output("look", pa.array([x, y, z])) trajectory = np.array( [ diff --git a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py index 19e007d7..42eab52a 100644 --- a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py +++ b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py @@ -72,8 +72,8 @@ def generate(frames: dict, question, history, past_key_values=None, select_image { "type": "image", "image": image, - "resized_height": image.size[1] / IMAGE_RESIZE_RATIO, - "resized_width": image.size[0] / IMAGE_RESIZE_RATIO, + "resized_height": image.size[1] * IMAGE_RESIZE_RATIO, + "resized_width": image.size[0] * IMAGE_RESIZE_RATIO, } for image in images ]