From 39b96bd4b71418f86cc7b9cd4738c3f2b1bcd4cc Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Thu, 1 May 2025 21:13:57 +0200
Subject: [PATCH] working so100 inference code with qwenvl

---
 examples/so100-remote/parse_bbox.py        |  69 ++++
 examples/so100-remote/parse_keyboard.py    | 127 ++++---
 examples/so100-remote/parse_pose.py        | 152 ++++++++
 examples/so100-remote/parse_whisper.py     |  32 ++
 examples/so100-remote/qwenvl.yml           | 141 ++++++++
 examples/so100-remote/so100_inference.urdf | 384 +++++++++++++++++++++
 examples/so100-remote/test.yml             |   8 +-
 7 files changed, 842 insertions(+), 71 deletions(-)
 create mode 100644 examples/so100-remote/parse_bbox.py
 create mode 100644 examples/so100-remote/parse_pose.py
 create mode 100644 examples/so100-remote/parse_whisper.py
 create mode 100644 examples/so100-remote/qwenvl.yml
 create mode 100644 examples/so100-remote/so100_inference.urdf

diff --git a/examples/so100-remote/parse_bbox.py b/examples/so100-remote/parse_bbox.py
new file mode 100644
index 00000000..6a6af454
--- /dev/null
+++ b/examples/so100-remote/parse_bbox.py
@@ -0,0 +1,69 @@
+"""TODO: Add docstring."""
+
+import json
+import os
+
+import numpy as np
+import pyarrow as pa
+from dora import Node
+
+node = Node()
+
+IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))
+
+
+def extract_bboxes(json_text):
+    """Extract bounding boxes from a JSON string with markdown markers and return them as a NumPy array.
+
+    Parameters
+    ----------
+    json_text : str
+        JSON string containing bounding box data, including ```json markers.
+
+    Returns
+    -------
+    np.ndarray: NumPy array of bounding boxes.
+
+    """
+    # Ensure all lines are stripped of whitespace and markers
+    lines = json_text.strip().splitlines()
+
+    # Filter out lines that are markdown markers
+    clean_lines = [line for line in lines if not line.strip().startswith("```")]
+
+    # Join the lines back into a single string
+    clean_text = "\n".join(clean_lines)
+    # Parse the cleaned JSON text
+    try:
+        data = json.loads(clean_text)
+
+        # Extract bounding boxes
+        bboxes = [item["bbox_2d"] for item in data]
+        labels = [item["label"] for item in data]
+
+        return np.array(bboxes), np.array(labels)
+    except Exception as _e:  # noqa
+        pass
+    return None, None
+
+
+for event in node:
+    if event["type"] == "INPUT":
+        if len(event["value"]) == 0:
+            node.send_output("bbox_track", pa.array([]))
+            continue
+
+        text = event["value"][0].as_py()
+        metadata = event["metadata"]
+        image_id = event["metadata"]["image_id"]
+
+        bboxes, labels = extract_bboxes(text)
+        if bboxes is not None and len(bboxes) > 0:
+            bboxes = bboxes * int(1 / IMAGE_RESIZE_RATIO)
+            metadata["image_id"] = image_id
+            metadata["encoding"] = "xyxy"
+            node.send_output(
+                "bbox",
+                pa.array(bboxes.ravel()),
+                metadata,
+            )
diff --git a/examples/so100-remote/parse_keyboard.py b/examples/so100-remote/parse_keyboard.py
index ed085c42..2e0d84cb 100644
--- a/examples/so100-remote/parse_keyboard.py
+++ b/examples/so100-remote/parse_keyboard.py
@@ -11,94 +11,85 @@ target_y = -0.02
 target_x = 0.00
 
 place_x = -0.02
-place_y = -0.1
-
-top_z = -0.50
+place_y = 0.2
+place_z = -0.48
+top_z = -0.44
 low_z = -0.57
 
 roll = 1.86
 pitch = 1.43
-yaw_closed = 0.8
-yaw_opened = -0.5
-
-now = time.time()
-time.sleep(1.5)
-
-node.send_output(
-    "action",
-    pa.array([target_x, target_y, top_z, roll, pitch, yaw_closed]),
-    metadata={"encoding": "xyzrpy"},
-)
+yaw_open = 0.8
+yaw_close = -0.5
 
-time.sleep(0.8)
 
-node.send_output(
-    "action",
-    pa.array([target_x, target_y, top_z, roll, pitch, yaw_closed]),
-    metadata={"encoding": "xyzrpy"},
-)
+def grab(target_x, target_y, low_z, top_z, roll, pitch, yaw_open, yaw_close):
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, top_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-time.sleep(0.5)
+    time.sleep(0.8)
 
-node.send_output(
-    "action",
-    pa.array([target_x, target_y, low_z, roll, pitch, yaw_closed]),
-    metadata={"encoding": "xyzrpy"},
-)
-time.sleep(0.2)
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, low_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
+    time.sleep(0.2)
 
 
-node.send_output(
-    "action",
-    pa.array([target_x, target_y, low_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, low_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
+    time.sleep(1.0)
 
-time.sleep(1.0)
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-node.send_output(
-    "action",
-    pa.array([target_x, target_y, top_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
 
-time.sleep(0.3)
+def place(place_x, place_y, place_z, top_z, roll, pitch, yaw_open, yaw_close):
 
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-node.send_output(
-    "action",
-    pa.array([place_x, place_y, top_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
+    time.sleep(1.0)
 
-time.sleep(1.0)
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, place_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-node.send_output(
-    "action",
-    pa.array([place_x, place_y, low_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
+    time.sleep(1.0)
 
-time.sleep(0.2)
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, place_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
+    time.sleep(0.5)
 
-node.send_output(
-    "action",
-    pa.array([place_x, place_y, low_z, roll, pitch, yaw_closed]),
-    metadata={"encoding": "xyzrpy"},
-)
-time.sleep(1.0)
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, place_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-node.send_output(
-    "action",
-    pa.array([place_x, place_y, top_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
+    time.sleep(0.5)
 
-time.sleep(1.0)
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
 
-node.send_output(
-    "action",
-    pa.array([place_x, place_y, top_z, roll, pitch, yaw_opened]),
-    metadata={"encoding": "xyzrpy"},
-)
diff --git a/examples/so100-remote/parse_pose.py b/examples/so100-remote/parse_pose.py
new file mode 100644
index 00000000..5e3d702f
--- /dev/null
+++ b/examples/so100-remote/parse_pose.py
@@ -0,0 +1,152 @@
+"""TODO: Add docstring."""
+
+import time
+import numpy as np
+import pyarrow as pa
+from dora import Node
+
+node = Node()
+top_z = -0.48
+low_z = -0.57
+
+roll = 1.86
+pitch = 1.43
+yaw_open = 0.8
+yaw_close = -0.5
+
+
+def grab(target_x, target_y, low_z, top_z, roll, pitch, yaw_open, yaw_close, last_x, last_y):
+
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, top_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
+
+    time.sleep(0.6)
+
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, low_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
+    time.sleep(0.5)
+
+
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, low_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
+
+    time.sleep(0.5)
+
+    node.send_output(
+        "action",
+        pa.array([target_x, target_y, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
+
+    node.send_output(
+        "action",
+        pa.array([0.05, 0.04, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
+        
+
+def place(place_x, place_y, place_z, top_z, roll, pitch, yaw_open, yaw_close, last_x, last_y):
+
+
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, top_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
+
+    time.sleep(0.6)
+
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, place_z, roll, pitch, yaw_close]),
+        metadata={"encoding": "xyzrpy"},
+    )
+
+    time.sleep(0.5)
+
+
+    node.send_output(
+        "action",
+        pa.array([place_x, place_y, top_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )    
+    
+    time.sleep(0.7)
+
+
+    node.send_output(
+        "action",
+        pa.array([0.05, 0.04, top_z, roll, pitch, yaw_open]),
+        metadata={"encoding": "xyzrpy"},
+    )
+        
+
+
+node.send_output(
+    "action",
+    pa.array([0.05, 0.04, top_z, roll, pitch, yaw_open]),
+    metadata={"encoding": "xyzrpy"},
+)
+
+last_x = 0
+last_y = 0
+last_z = 0
+
+for event in node:
+    if event["type"] == "INPUT":
+        if event["id"] == "pose":
+            values = event["value"]
+            values = values.to_numpy()
+            print(values)
+            if len(values) == 0:
+                continue
+            x = values[0]
+            y = values[1]
+            z = values[2]
+            action = event["metadata"]["action"]
+            
+
+            # Adjust z with the size of the gripper
+            z = z + 0.073
+            # y = y - 0.01
+            x = x - 0.01
+            match action:
+                case "grab":
+                    grab(
+                        x,
+                        y,
+                        z,
+                        top_z,
+                        roll,
+                        pitch,
+                        yaw_open,
+                        yaw_close,
+                        last_x,
+                        last_y
+                    )
+                case "release":
+                    y = y - 0.02
+                    place(
+                        x,
+                        y,
+                        z,
+                        top_z,
+                        roll,
+                        pitch,
+                        yaw_open,
+                        yaw_close,
+                        last_x,
+                        last_y
+                    )
+            last_x = -0.05
+            last_y = 0.04
+            last_z = z
diff --git a/examples/so100-remote/parse_whisper.py b/examples/so100-remote/parse_whisper.py
new file mode 100644
index 00000000..a667760c
--- /dev/null
+++ b/examples/so100-remote/parse_whisper.py
@@ -0,0 +1,32 @@
+"""TODO: Add docstring."""
+
+import json
+import os
+import time
+
+import numpy as np
+import pyarrow as pa
+from dora import Node
+
+node = Node()
+
+
+last_prompt = ""
+for event in node:
+    if event["type"] == "INPUT":
+        if event["id"] == "text":
+            text = event["value"][0].as_py().lower()
+
+            if "grab " in text:
+                text = f"Given the prompt: {text}. Output the bounding boxes for the given object"
+                node.send_output(
+                    "text", pa.array([text]), {"image_id": "image", "action": "grab"}
+                )
+
+            elif "put " in text:
+                text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
+                node.send_output(
+                    "text",
+                    pa.array([text]),
+                    {"image_id": "image", "action": "release"},
+                )
diff --git a/examples/so100-remote/qwenvl.yml b/examples/so100-remote/qwenvl.yml
new file mode 100644
index 00000000..efa0cd97
--- /dev/null
+++ b/examples/so100-remote/qwenvl.yml
@@ -0,0 +1,141 @@
+nodes:
+  - id: so100
+    path: dora-rustypot
+    inputs:
+      tick: dora/timer/millis/33
+      pose:
+        source: pytorch-kinematics/action
+        queue_size: 100
+    outputs:
+      - pose
+    env:
+      PORT: /dev/ttyACM0
+      TORQUE: true
+      IDS: 1 2 3 4 5 6
+
+  - id: camera
+    build: pip install -e ../../node-hub/dora-pyrealsense
+    path: dora-pyrealsense
+    inputs:
+      tick: dora/timer/millis/33
+    outputs:
+      - image
+      - depth
+
+  - id: pytorch-kinematics
+    build: pip install -e ../../node-hub/dora-pytorch-kinematics
+    path: dora-pytorch-kinematics
+    inputs:
+      pose: so100/pose
+      action:
+        source: parse_pose/action
+        queue_size: 100
+    outputs:
+      - pose
+      - action
+    env:
+      URDF_PATH: so100.urdf
+      END_EFFECTOR_LINK: "Moving Jaw"
+      TRANSFORM: -0.18 0.02 -0.65 0.7 0 0 0.7
+
+  - id: plot
+    build: pip install -e ../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      #series_so100: so100/pose
+      # series_pose: pytorch-kinematics/pose
+      jointstate_so100: so100/pose
+      jointstate_so100_inference: pytorch-kinematics/action
+      camera/image: camera/image
+      camera/depth: camera/depth
+      text_whisper: dora-distil-whisper/text
+      text_vlm: dora-qwenvl/text
+      camera/boxes2d: parse_bbox/bbox
+      camera/masks: sam2/masks
+    env:
+      so100_urdf: so100.urdf
+      so100_inference_urdf: so100_inference.urdf
+      so100_transform: -0.18 0.02 -0.65 0.7 0 0 0.7
+      so100_inference_transform: -0.18 0.02 -0.65 0.7 0 0 0.7
+      CAMERA_PITCH: -3.1415
+
+  - id: dora-microphone
+    build: pip install -e ../../node-hub/dora-microphone
+    path: dora-microphone
+    inputs:
+      tick: dora/timer/millis/2000
+    outputs:
+      - audio
+
+  - id: parse_whisper
+    path: parse_whisper.py
+    inputs:
+      text: dora-distil-whisper/text
+    outputs:
+      - text
+
+  - id: dora-qwenvl
+    build: pip install -e ../../node-hub/dora-qwen2-5-vl
+    path: dora-qwen2-5-vl
+    inputs:
+      image: camera/image
+      text: parse_whisper/text
+    outputs:
+      - text
+    env:
+      DEFAULT_QUESTION: Output the bounding box of the suitcase.
+      IMAGE_RESIZE_RATIO: "1.0"
+
+  - id: parse_bbox
+    path: parse_bbox.py
+    inputs:
+      text: dora-qwenvl/text
+    outputs:
+      - bbox
+    env:
+      IMAGE_RESIZE_RATIO: "1.0"
+
+  - id: sam2
+    build: pip install -e ../../node-hub/dora-sam2
+    path: dora-sam2
+    inputs:
+      image: camera/image
+      boxes2d: parse_bbox/bbox
+    outputs:
+      - masks
+
+  - id: box_coordinates
+    build: pip install -e ../../node-hub/dora-object-to-pose
+    path: dora-object-to-pose
+    inputs:
+      depth: camera/depth
+      masks: sam2/masks
+    outputs:
+      - pose
+    env:
+      CAMERA_PITCH: -3.1415
+
+  - id: parse_pose
+    path: parse_pose.py
+    inputs:
+      pose: box_coordinates/pose
+    outputs:
+      - action
+
+  - id: dora-vad
+    build: pip install -e ../../node-hub/dora-vad
+    path: dora-vad
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - audio
+
+  - id: dora-distil-whisper
+    build: pip install -e ../../node-hub/dora-distil-whisper
+    path: dora-distil-whisper
+    inputs:
+      input: dora-vad/audio
+    outputs:
+      - text
+    env:
+      TARGET_LANGUAGE: english
diff --git a/examples/so100-remote/so100_inference.urdf b/examples/so100-remote/so100_inference.urdf
new file mode 100644
index 00000000..a72a7ebb
--- /dev/null
+++ b/examples/so100-remote/so100_inference.urdf
@@ -0,0 +1,384 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com) 
+     Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
+     For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
+<robot
+  name="SO_5DOF_ARM100_8j_URDF.SLDASM">
+  <link
+    name="base_link">
+    <inertial>
+      <origin
+        xyz="-2.45960666746703E-07 0.0311418169687909 0.0175746661003382"
+        rpy="0 0 0" />
+      <mass
+        value="0.193184127927598" />
+      <inertia
+        ixx="0.000137030709467877"
+        ixy="2.10136126944992E-08"
+        ixz="4.24087422551286E-09"
+        iyy="0.000169089551209259"
+        iyz="2.26514711036514E-05"
+        izz="0.000145097720857224" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Base.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Base.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <link
+    name="Rotation_Pitch">
+    <inertial>
+      <origin
+        xyz="-9.07886224712597E-05 0.0590971820568318 0.031089016892169"
+        rpy="0 0 0" />
+      <mass
+        value="0.119226314127197" />
+      <inertia
+        ixx="5.90408775624429E-05"
+        ixy="4.90800532852998E-07"
+        ixz="-5.90451772654387E-08"
+        iyy="3.21498601038881E-05"
+        iyz="-4.58026206663885E-06"
+        izz="5.86058514263952E-05" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Rotation_Pitch.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Rotation_Pitch.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Rotation"
+    type="revolute">
+    <origin
+      xyz="0 -0.0452 0.0165"
+      rpy="1.5708 0 0" />
+    <parent
+      link="base_link" />
+    <child
+      link="Rotation_Pitch" />
+    <axis
+      xyz="0 -1 0" />
+      <!-- note for the so100 arm there is no well defined effort/velocity limits at the moment -->
+
+  </joint>
+  <link
+    name="Upper_Arm">
+    <inertial>
+      <origin
+        xyz="-1.7205170190925E-05 0.0701802156327694 0.00310545118155671"
+        rpy="0 0 0" />
+      <mass
+        value="0.162409284599177" />
+      <inertia
+        ixx="0.000167153146617081"
+        ixy="1.03902689187701E-06"
+        ixz="-1.20161820645189E-08"
+        iyy="7.01946992214245E-05"
+        iyz="2.11884806298698E-06"
+        izz="0.000213280241160769" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Upper_Arm.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Upper_Arm.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Pitch"
+    type="revolute">
+    <origin
+      xyz="0 0.1025 0.0306"
+      rpy="0 0 0" />
+    <parent
+      link="Rotation_Pitch" />
+    <child
+      link="Upper_Arm" />
+    <axis
+      xyz="1 0 0" />
+
+  </joint>
+  <link
+    name="Lower_Arm">
+    <inertial>
+      <origin
+        xyz="-0.00339603710186651 0.00137796353960074 0.0768006751156044"
+        rpy="0 0 0" />
+      <mass
+        value="0.147967774582291" />
+      <inertia
+        ixx="0.000105333995841409"
+        ixy="1.73059237226499E-07"
+        ixz="-1.1720305455211E-05"
+        iyy="0.000138766654485212"
+        iyz="1.77429964684103E-06"
+        izz="5.08741652515214E-05" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Lower_Arm.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Lower_Arm.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Elbow"
+    type="revolute">
+    <origin
+      xyz="0 0.11257 0.028"
+      rpy="-0.1 0 0" />
+    <parent
+      link="Upper_Arm" />
+    <child
+      link="Lower_Arm" />
+    <axis
+      xyz="1 0 0" />
+  </joint>
+  <link
+    name="Wrist_Pitch_Roll">
+    <inertial>
+      <origin
+        xyz="-0.00852653127372418 -0.0352278997897927 -2.34622481569413E-05"
+        rpy="0 0 0" />
+      <mass
+        value="0.066132067097723" />
+      <inertia
+        ixx="1.95717492443445E-05"
+        ixy="-6.62714374412293E-07"
+        ixz="5.20089016442066E-09"
+        iyy="2.38028417569933E-05"
+        iyz="4.09549055863776E-08"
+        izz="3.4540143384536E-05" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Wrist_Pitch_Roll.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Wrist_Pitch_Roll.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Wrist_Pitch"
+    type="revolute">
+    <origin
+      xyz="0 0.0052 0.1349"
+      rpy="-1.5708 0 0" />
+    <parent
+      link="Lower_Arm" />
+    <child
+      link="Wrist_Pitch_Roll" />
+    <axis
+      xyz="1 0 0" />
+    <limit 
+      lower="-1.8"
+      upper="1.8"
+      effort="0"
+      velocity="0" />
+  </joint>
+  <link
+    name="Fixed_Jaw">
+    <inertial>
+      <origin
+        xyz="0.00552376906426563 -0.0280167153359021 0.000483582592841092"
+        rpy="0 0 0" />
+      <mass
+        value="0.0929859131176897" />
+      <inertia
+        ixx="4.3328249304211E-05"
+        ixy="7.09654328670947E-06"
+        ixz="5.99838530879484E-07"
+        iyy="3.04451747368212E-05"
+        iyz="-1.58743247545413E-07"
+        izz="5.02460913506734E-05" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Fixed_Jaw.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <!-- <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Fixed_Jaw.STL" />
+      </geometry>
+    </collision> -->
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Wrist_Roll_Follower_SO101.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Wrist_Roll"
+    type="revolute">
+    <origin
+      xyz="0 -0.0601 0"
+      rpy="0 1.5708 0" />
+    <parent
+      link="Wrist_Pitch_Roll" />
+    <child
+      link="Fixed_Jaw" />
+    <axis
+      xyz="0 1 0" />
+
+  </joint>
+  <link
+    name="Moving Jaw">
+    <inertial>
+      <origin
+        xyz="-0.00161744605468241 -0.0303472584046471 0.000449645961853651"
+        rpy="0 0 0" />
+      <mass
+        value="0.0202443794940372" />
+      <inertia
+        ixx="1.10911325081525E-05"
+        ixy="-5.35076503033314E-07"
+        ixz="-9.46105662101403E-09"
+        iyy="3.03576451001973E-06"
+        iyz="-1.71146075110632E-07"
+        izz="8.9916083370498E-06" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Moving Jaw.STL" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.2 0.819607843137255 0.2 1" />
+      </material>
+    </visual>
+    <collision>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="Moving_Jaw_SO101.STL" />
+      </geometry>
+    </collision>
+  </link>
+  <joint
+    name="Jaw"
+    type="revolute">
+    <origin
+      xyz="-0.0202 -0.0244 0"
+      rpy="3.1416 0 3.33" />
+    <parent
+      link="Fixed_Jaw" />
+    <child
+      link="Moving Jaw" />
+    <axis
+      xyz="0 0 1" />
+
+  </joint>
+</robot>
\ No newline at end of file
diff --git a/examples/so100-remote/test.yml b/examples/so100-remote/test.yml
index a7bf9740..05cfb161 100644
--- a/examples/so100-remote/test.yml
+++ b/examples/so100-remote/test.yml
@@ -43,13 +43,15 @@ nodes:
     build: pip install -e ../../node-hub/dora-rerun
     path: dora-rerun
     inputs:
-      #series_so100: so100/pose
       # series_pose: pytorch-kinematics/pose
-      series_so100: pytorch-kinematics/action
-      jointstate_so100: pytorch-kinematics/action
+      series_so100_inference: pytorch-kinematics/action
+      jointstate_so100: so100/pose
+      jointstate_so100_inference: pytorch-kinematics/action
       camera/image: camera/image
       camera/depth: camera/depth
     env:
       so100_urdf: so100.urdf
+      so100_inference_urdf: so100_inference.urdf
       so100_transform: -0.18 0.02 -0.65 0.7 0 0 0.7
+      so100_inference_transform: -0.18 0.02 -0.65 0.7 0 0 0.7
       CAMERA_PITCH: -3.1415