You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse_whisper.py 5.4 kB

9 months ago
9 months ago
9 months ago
9 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. """TODO: Add docstring."""
  2. import json
  3. import os
  4. import time
  5. import numpy as np
  6. import pyarrow as pa
  7. from dora import Node
  8. node = Node()
  9. IMAGE_RESIZE_RATIO = float(os.getenv("IMAGE_RESIZE_RATIO", "1.0"))
  10. def extract_bboxes(json_text):
  11. """Extract bounding boxes from a JSON string with markdown markers and return them as a NumPy array.
  12. Parameters
  13. ----------
  14. json_text : str
  15. JSON string containing bounding box data, including ```json markers.
  16. Returns
  17. -------
  18. np.ndarray: NumPy array of bounding boxes.
  19. """
  20. # Ensure all lines are stripped of whitespace and markers
  21. lines = json_text.strip().splitlines()
  22. # Filter out lines that are markdown markers
  23. clean_lines = [line for line in lines if not line.strip().startswith("```")]
  24. # Join the lines back into a single string
  25. clean_text = "\n".join(clean_lines)
  26. # Parse the cleaned JSON text
  27. try:
  28. data = json.loads(clean_text)
  29. # Extract bounding boxes
  30. bboxes = [item["bbox_2d"] for item in data]
  31. labels = [item["label"] for item in data]
  32. return np.array(bboxes), np.array(labels)
  33. except Exception as _e: # noqa
  34. pass
  35. return None, None
  36. last_prompt = ""
  37. for event in node:
  38. if event["type"] == "INPUT":
  39. if event["id"] == "text":
  40. text = event["value"][0].as_py().lower()
  41. if "stop" in text:
  42. node.send_output("points", pa.array([], type=pa.float64()))
  43. elif "follow" in text:
  44. text = f"Given the prompt: {text}. Output the bounding boxes for the given followed object"
  45. node.send_output("text", pa.array([text]), {"image_id": "image_left"})
  46. elif "grab " in text:
  47. text = f"Given the prompt: {text}. Output the bounding boxes for the given grabbed object"
  48. node.send_output(
  49. "text", pa.array([text]), {"image_id": "image_depth", "action": "grab"}
  50. )
  51. elif "get " in text:
  52. text = f"Given the prompt: {text}. Output the bounding boxes for the object"
  53. node.send_output(
  54. "text", pa.array([text]), {"image_id": "image_left", "action": "grab"}
  55. )
  56. last_prompt = text
  57. elif "put " in text:
  58. text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
  59. node.send_output(
  60. "text",
  61. pa.array([text]),
  62. {"image_id": "image_left", "action": "release"},
  63. )
  64. last_prompt = text
  65. elif "drop " in text:
  66. text = f"Given the prompt: {text}. Output the bounding boxes for the place to drop the object"
  67. node.send_output(
  68. "text",
  69. pa.array([text]),
  70. {"image_id": "image_depth", "action": "release"},
  71. )
  72. elif "release left" in text:
  73. node.send_output("action_release_left", pa.array([1.0]))
  74. elif "release right" in text:
  75. node.send_output("action_release_right", pa.array([1.0]))
  76. elif "turn left" in text:
  77. action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
  78. node.send_output("action", action)
  79. time.sleep(0.25)
  80. action = pa.array([0.0, 0, 0, 0, 0, np.deg2rad(160)])
  81. node.send_output("action", action)
  82. node.send_output("points", pa.array([]))
  83. elif "turn right" in text:
  84. action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
  85. node.send_output("action", action)
  86. time.sleep(0.25)
  87. action = pa.array([0.0, 0, 0, 0, 0, -np.deg2rad(160)])
  88. node.send_output("action", action)
  89. node.send_output("points", pa.array([]))
  90. elif "move forward" in text:
  91. action = pa.array([0.5, 0, 0, 0, 0, 0])
  92. node.send_output("action", action)
  93. time.sleep(0.25)
  94. node.send_output("action", action)
  95. node.send_output("points", pa.array([]))
  96. elif "move backward" in text:
  97. action = pa.array([-0.5, 0, 0, 0, 0, 0])
  98. node.send_output("action", action)
  99. time.sleep(0.25)
  100. node.send_output("action", action)
  101. node.send_output("points", pa.array([]))
  102. elif event["id"] == "arrived":
  103. text = last_prompt
  104. print("received arrived message")
  105. node.send_output("points", pa.array([]))
  106. if "get " in text:
  107. text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
  108. node.send_output(
  109. "text",
  110. pa.array([text]),
  111. {"image_id": "image_depth", "action": "grab"},
  112. )
  113. elif "put " in text:
  114. text = f"Given the prompt: {text}. Output the bounding boxes for the place to put the object"
  115. node.send_output(
  116. "text",
  117. pa.array([text]),
  118. {"image_id": "image_depth", "action": "release"},
  119. )