fix image ratio reduction

1 year ago · dcb4dd0769
--- a/examples/reachy2/demo-dev.yml
+++ b/examples/reachy2/demo-dev.yml
@@ -34,7 +34,7 @@ nodes:
    outputs:
      - response_base
    env:
      ROBOT_IP: 10.42.0.80
      ROBOT_IP: 172.17.134.85

  - id: reachy-left-arm
    build: pip install -e ../../node-hub/dora-reachy2
@@ -44,7 +44,7 @@ nodes:
    outputs:
      - response_l_arm
    env:
      ROBOT_IP: 10.42.0.80
      ROBOT_IP: 172.17.134.85

  - id: reachy-right-arm
    build: pip install -e ../../node-hub/dora-reachy2
@@ -54,7 +54,7 @@ nodes:
    outputs:
      - response_r_arm
    env:
      ROBOT_IP: 10.42.0.80
      ROBOT_IP: 172.17.134.85

  - id: reachy-camera
    build: pip install -e ../../node-hub/dora-reachy2
@@ -67,7 +67,7 @@ nodes:
      - image_depth
      - depth
    env:
      ROBOT_IP: 10.42.0.80
      ROBOT_IP: 172.17.134.85

  - id: reachy-head
    build: pip install -e ../../node-hub/dora-reachy2
@@ -76,20 +76,20 @@ nodes:
      boxes2d: parse_bbox/bbox_face
      look: state_machine/look
    env:
      ROBOT_IP: 10.42.0.80
      ROBOT_IP: 172.17.134.85

  - id: plot
    build: pip install -e ../../node-hub/dora-rerun
    path: dora-rerun
    inputs:
      camera_left/image_right: reachy-camera/image_right
      camera_torso/image: reachy-camera/image_depth
      text_response: dora-qwenvl/text
      text_whisper: dora-distil-whisper/text
      camera_torso/boxes2d: parse_bbox/bbox
      camera_left/boxes2d_face: parse_bbox/bbox_face
    env:
      RERUN_MEMORY_LIMIT: 5%
  #- id: plot
  #build: pip install -e ../../node-hub/dora-rerun
  #path: dora-rerun
  #inputs:
  #camera_left/image_right: reachy-camera/image_right
  #camera_torso/image: reachy-camera/image_depth
  #text_response: dora-qwenvl/text
  #text_whisper: dora-distil-whisper/text
  #camera_torso/boxes2d: parse_bbox/bbox
  #camera_left/boxes2d_face: parse_bbox/bbox_face
  #env:
  #RERUN_MEMORY_LIMIT: 5%

  - id: dora-qwenvl
    build: pip install -e ../../node-hub/dora-qwen2-5-vl
@@ -104,6 +104,7 @@ nodes:
    env:
      #ADAPTER_PATH: /home/peter/Documents/work/LLaMA-Factory/saves/qwen2.5_vl-felix/lora/sft/checkpoint-558
      DEFAULT_QUESTION: grab human.
      IMAGE_RESIZE_RATIO: "0.5"
      # ACTIVATION_WORDS: grab pick give output take catch grabs picks gives output takes catches have
      #SYSTEM_PROMPT: You're a robot.

@@ -115,6 +116,8 @@ nodes:
      - bbox
      - bbox_face
      - action_arm
    env:
      IMAGE_RESIZE_RATIO: "0.5"

  - id: box_coordinates
    build: pip install -e ../../node-hub/dora-boxes2d-to-pose
--- a/examples/reachy2/parse_bbox.py
+++ b/examples/reachy2/parse_bbox.py
@@ -1,4 +1,5 @@
 import json
 import os

 import numpy as np
 import pyarrow as pa
@@ -6,6 +7,8 @@ from dora import Node

 node = Node()

 IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))


 def extract_bboxes(json_text):
    """
@@ -45,7 +48,7 @@ for event in node:

        bboxes, labels = extract_bboxes(text)
        if bboxes is not None and len(bboxes) > 0:
            bboxes = bboxes  # * 2
            bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
            if "human" in labels[0] or "head" in labels[0]:
                node.send_output("bbox_face", pa.array(bboxes.ravel()))
            else:
--- a/examples/reachy2/state_machine.py
+++ b/examples/reachy2/state_machine.py
@@ -1,4 +1,4 @@
 ## State Machine
 # State Machine
 import os
 import time

@@ -105,18 +105,17 @@ def wait_for_events(ids: list[str]):


 while True:

    ### === IDLE ===

    node.send_output(
        "action_r_arm",
        pa.array(r_default_pose),
        metadata={"encoding": "jointstate"},
        metadata={"encoding": "jointstate", "duration": 2},
    )
    node.send_output(
        "action_l_arm",
        pa.array(l_default_pose),
        metadata={"encoding": "jointstate"},
        metadata={"encoding": "jointstate", "duration": 2},
    )
    wait_for_events(ids=["response_r_arm", "response_l_arm"])

@@ -182,7 +181,9 @@ while True:
        y = values[1]
        z = values[2]
        x = x + 0.04
        z = np.clip(z, -0.31, -0.22)

        ## Clip the Maximum and minim values for the height of the arm to avoid collision or weird movement.
        z = np.clip(z, -0.32, -0.22)
        node.send_output("look", pa.array([x, y, z]))
        trajectory = np.array(
            [
--- a/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
+++ b/node-hub/dora-qwen2-5-vl/dora_qwen2_5_vl/main.py
@@ -72,8 +72,8 @@ def generate(frames: dict, question, history, past_key_values=None, select_image
                {
                    "type": "image",
                    "image": image,
                    "resized_height": image.size[1] / IMAGE_RESIZE_RATIO,
                    "resized_width": image.size[0] / IMAGE_RESIZE_RATIO,
                    "resized_height": image.size[1] * IMAGE_RESIZE_RATIO,
                    "resized_width": image.size[0] * IMAGE_RESIZE_RATIO,
                }
                for image in images
            ]