diff --git a/examples/python-ros2-dataflow/README.md b/examples/python-ros2-dataflow/README.md new file mode 100644 index 00000000..b07f18fc --- /dev/null +++ b/examples/python-ros2-dataflow/README.md @@ -0,0 +1,9 @@ +# Quick Python ROS2 example + +To get started: + +```bash +source /opt/ros/humble/setup.bash && ros2 run turtlesim turtlesim_node & +source /opt/ros/humble/setup.bash && ros2 run examples_rclcpp_minimal_service service_main & +cargo run --example python-ros2-dataflow --features="ros2-examples" +``` diff --git a/examples/rust-dataflow/README.md b/examples/rust-dataflow/README.md new file mode 100644 index 00000000..71eaf706 --- /dev/null +++ b/examples/rust-dataflow/README.md @@ -0,0 +1,7 @@ +# Quick Rust example + +To get started: + +```bash +cargo run --example rust-dataflow +``` diff --git a/examples/speech-to-text/README.md b/examples/speech-to-text/README.md index 1853d8a4..6111a79c 100644 --- a/examples/speech-to-text/README.md +++ b/examples/speech-to-text/README.md @@ -1,12 +1,31 @@ -# Dora echo example +# Dora Speech to Text example Make sure to have, dora, pip and cargo installed. ```bash -dora up -dora build dataflow.yml -dora start dataflow.yml +dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-text/whisper.yml +dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-text/whisper.yml + +# Wait for the whisper model to download which can takes a bit of time. +``` + +## Graph Visualization + +```mermaid + +flowchart TB + dora-microphone + dora-vad + dora-distil-whisper + dora-rerun[/dora-rerun\] +subgraph ___dora___ [dora] + subgraph ___timer_timer___ [timer] + dora/timer/secs/2[\secs/2/] + end +end + dora/timer/secs/2 -- tick --> dora-microphone + dora-microphone -- audio --> dora-vad + dora-vad -- audio as input --> dora-distil-whisper + dora-distil-whisper -- text as original_text --> dora-rerun -# In another terminal -terminal-print ``` diff --git a/examples/speech-to-text/dataflow.yml b/examples/speech-to-text/whisper-dev.yml similarity index 93% rename from examples/speech-to-text/dataflow.yml rename to examples/speech-to-text/whisper-dev.yml index bad75e7e..1742bd9f 100644 --- a/examples/speech-to-text/dataflow.yml +++ b/examples/speech-to-text/whisper-dev.yml @@ -2,6 +2,8 @@ nodes: - id: dora-microphone build: pip install -e ../../node-hub/dora-microphone path: dora-microphone + inputs: + tick: dora/timer/millis/2000 outputs: - audio diff --git a/examples/speech-to-text/whisper.yml b/examples/speech-to-text/whisper.yml new file mode 100644 index 00000000..ce919154 --- /dev/null +++ b/examples/speech-to-text/whisper.yml @@ -0,0 +1,33 @@ +nodes: + - id: dora-microphone + description: Microphone + build: pip install dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: dora-rerun + build: pip install dora-rerun + path: dora-rerun + inputs: + original_text: dora-whisper/text diff --git a/examples/vlm/README.md b/examples/vlm/README.md index ab6eec8c..25db91be 100644 --- a/examples/vlm/README.md +++ b/examples/vlm/README.md @@ -1 +1,11 @@ # Quick example on using a VLM with dora-rs + +Make sure to have, dora, pip and cargo installed. + +```bash +dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/vlm/qwenvl.yml + +dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/vlm/qwenvl.yml + +# Wait for the qwenvl, whisper model to download which can takes a bit of time. +``` diff --git a/examples/vlm/dataflow.yml b/examples/vlm/dataflow.yml deleted file mode 100644 index 51908b37..00000000 --- a/examples/vlm/dataflow.yml +++ /dev/null @@ -1,37 +0,0 @@ -nodes: - - id: camera - build: pip install -e ../../node-hub/opencv-video-capture - path: opencv-video-capture - inputs: - tick: dora/timer/millis/50 - outputs: - - image - env: - CAPTURE_PATH: 0 - IMAGE_WIDTH: 640 - IMAGE_HEIGHT: 480 - - - id: dora-qwenvl - build: pip install -e ../../node-hub/dora-qwenvl - path: dora-qwenvl - inputs: - image: - source: camera/image - queue_size: 1 - tick: dora/timer/millis/400 - outputs: - - text - - tick - env: - DEFAULT_QUESTION: Describe the image in a very short sentence. - # For China - # USE_MODELSCOPE_HUB: true - - - id: plot - build: pip install -e ../../node-hub/opencv-plot - path: opencv-plot - inputs: - image: - source: camera/image - queue_size: 1 - text: dora-qwenvl/tick diff --git a/examples/vlm/dataflow_rerun.yml b/examples/vlm/qwenvl-dev.yml similarity index 54% rename from examples/vlm/dataflow_rerun.yml rename to examples/vlm/qwenvl-dev.yml index 6c933a2f..2c0846bb 100644 --- a/examples/vlm/dataflow_rerun.yml +++ b/examples/vlm/qwenvl-dev.yml @@ -1,4 +1,30 @@ nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + - id: camera build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture @@ -18,10 +44,9 @@ nodes: image: source: camera/image queue_size: 1 - tick: dora/timer/millis/400 + text: dora-distil-whisper/text outputs: - text - - tick env: DEFAULT_QUESTION: Describe the image in a very short sentence. # USE_MODELSCOPE_HUB: true @@ -33,7 +58,8 @@ nodes: image: source: camera/image queue_size: 1 - text: dora-qwenvl/tick + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text env: IMAGE_WIDTH: 640 IMAGE_HEIGHT: 480 diff --git a/examples/vlm/qwenvl.yml b/examples/vlm/qwenvl.yml new file mode 100755 index 00000000..796b9b1a --- /dev/null +++ b/examples/vlm/qwenvl.yml @@ -0,0 +1,67 @@ +nodes: + - id: dora-microphone + build: pip install dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: camera + build: pip install opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/50 + outputs: + - image + env: + CAPTURE_PATH: 0 + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + + - id: dora-qwenvl + build: pip install dora-qwenvl + path: dora-qwenvl + inputs: + image: + source: camera/image + queue_size: 1 + text: dora-distil-whisper/text + outputs: + - text + env: + DEFAULT_QUESTION: Describe the image in a very short sentence. + # USE_MODELSCOPE_HUB: true + + - id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization of QwenVL2 diff --git a/node-hub/dora-distil-whisper/README.md b/node-hub/dora-distil-whisper/README.md index f1707fc9..0c1854a4 100644 --- a/node-hub/dora-distil-whisper/README.md +++ b/node-hub/dora-distil-whisper/README.md @@ -1,3 +1,30 @@ -# Dora Node for transforming speech to text (English only) +# Dora Whisper Node for transforming speech to text -Check example at [examples/speech-to-text](examples/speech-to-text) +## YAML Specification + +This node is supposed to be used as follows: + +```yaml +- id: dora-distil-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english +``` + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt +- Vision Language Model + - github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - website: https://dora-rs.ai/docs/examples/vlm + +## License + +Dora-whisper's code and model weights are released under the MIT License diff --git a/node-hub/dora-microphone/README.md b/node-hub/dora-microphone/README.md index 465a6243..021d9db6 100644 --- a/node-hub/dora-microphone/README.md +++ b/node-hub/dora-microphone/README.md @@ -1,5 +1,48 @@ -# Dora Node for recording data from microphone +# Collect data from microphone This node will send data as soon as the microphone volume is higher than a threshold. -Check example at [examples/speech-to-text](examples/speech-to-text) +This is using python Sounddevice. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - tick: This is used to detect when the dataflow is finished. +- outputs: + - audio: 16kHz sampled audio sent by chunk + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-microphone + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone + - website: http://dora-rs.ai/docs/nodes/microphone +- sounddevice + - website: https://python-sounddevice.readthedocs.io/en/0.5.1/ + - github: https://github.com/spatialaudio/python-sounddevice/tree/master + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py index cb65b6ac..aa8e03c6 100644 --- a/node-hub/dora-microphone/dora_microphone/main.py +++ b/node-hub/dora-microphone/dora_microphone/main.py @@ -16,13 +16,19 @@ def main(): start_recording_time = tm.time() node = Node() + always_none = node.next(timeout=0.001) is None + finished = False + # pylint: disable=unused-argument def callback(indata, frames, time, status): - nonlocal buffer, node, start_recording_time + nonlocal buffer, node, start_recording_time, finished if tm.time() - start_recording_time > MAX_DURATION: audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 node.send_output("audio", pa.array(audio_data)) + if not always_none: + event = node.next(timeout=0.001) + finished = event is None buffer = [] start_recording_time = tm.time() else: @@ -32,5 +38,5 @@ def main(): with sd.InputStream( callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE ): - while True: - sd.sleep(int(100 * 1000)) + while not finished: + sd.sleep(int(1000)) diff --git a/node-hub/dora-qwenvl/README.md b/node-hub/dora-qwenvl/README.md index 88f4e564..535ff3c3 100644 --- a/node-hub/dora-qwenvl/README.md +++ b/node-hub/dora-qwenvl/README.md @@ -1,3 +1,32 @@ # Dora QwenVL2 node Experimental node for using a VLM within dora. + +## YAML Specification + +This node is supposed to be used as follows: + +```yaml +- id: dora-qwenvl + build: pip install dora-qwenvl + path: dora-qwenvl + inputs: + image: + source: camera/image + queue_size: 1 + text: dora-distil-whisper/text + outputs: + - text + env: + DEFAULT_QUESTION: Describe the image in a very short sentence. +``` + +## Additional documentation + +- Qwenvl: https://github.com/QwenLM/Qwen-VL + +## Examples + +- Vision Language Model + - Github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - Website: https://dora-rs.ai/docs/examples/vlm diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index 31f11502..475d1f06 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -85,7 +85,12 @@ def generate(frames: dict, question): return_tensors="pt", ) - device = "cuda:0" if torch.cuda.is_available() else "cpu" + if torch.backends.mps.is_available(): + device = torch.device("mps") + elif torch.cuda.is_available(): + device = torch.device("cuda", 0) + else: + device = torch.device("cpu") inputs = inputs.to(device) # Inference: Generation of the output @@ -181,7 +186,7 @@ def main(): ) elif event_type == "ERROR": - raise RuntimeError(event["error"]) + print("Event Error:" + event["error"]) if __name__ == "__main__": diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml index a662ded8..346c5f9f 100644 --- a/node-hub/dora-qwenvl/pyproject.toml +++ b/node-hub/dora-qwenvl/pyproject.toml @@ -15,7 +15,7 @@ python = "^3.7" dora-rs = "^0.3.6" numpy = "< 2.0.0" torch = "^2.2.0" -torchvision = "^0.19" +torchvision = "^0.20" transformers = "^4.45" qwen-vl-utils = "^0.0.2" accelerate = "^0.33" diff --git a/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer b/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer index b2889e65..198374ea 160000 --- a/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer +++ b/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer @@ -1 +1 @@ -Subproject commit b2889e65cfe62571ced3ce88f00e7d80b41fee69 +Subproject commit 198374ea8c4a2ec2ddae86c35448d21aa9756f37 diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md index 5a6ee21f..0082cad9 100644 --- a/node-hub/dora-rerun/README.md +++ b/node-hub/dora-rerun/README.md @@ -7,25 +7,27 @@ This nodes is still experimental and format for passing Images, Bounding boxes, ## Getting Started ```bash -cargo install --force rerun-cli@0.15.1 - -## To install this package -git clone git@github.com:dora-rs/dora.git -cargo install --git https://github.com/dora-rs/dora dora-rerun +pip install dora-rerun ``` ## Adding to existing graph: ```yaml -- id: rerun - custom: - source: dora-rerun - inputs: - image: webcam/image - text: webcam/text - boxes2d: object_detection/bbox - envs: - RERUN_MEMORY_LIMIT: 25% +- id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization + RERUN_MEMORY_LIMIT: 25% ``` ## Input definition @@ -67,3 +69,25 @@ Make sure to name the dataflow as follows: ## Configurations - RERUN_MEMORY_LIMIT: Rerun memory limit + +## Reference documentation + +- dora-rerun + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-rerun + - website: http://dora-rs.ai/docs/nodes/rerun +- rerun + - github: https://github.com/rerun-io/rerun + - website: https://rerun.io + +## Examples + +- speech to text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt +- vision language model + - github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - website: https://dora-rs.ai/docs/examples/vlm + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-vad/README.md b/node-hub/dora-vad/README.md index b5e7ad8a..ba41cb10 100644 --- a/node-hub/dora-vad/README.md +++ b/node-hub/dora-vad/README.md @@ -1,3 +1,45 @@ # Speech Activity Detection(VAD) This is using Silero VAD. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - audio: 8kHz or 16kHz sample rate. +- outputs: + - audio: Same as input but truncated + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-sidero + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-vad + - website: http://dora-rs.ai/docs/nodes/sidero +- Sidero + - github https://github.com/snakers4/silero-vad + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora_rdt_1b/__init__.py b/node-hub/dora_rdt_1b/__init__.py deleted file mode 100644 index ed4e2191..00000000 --- a/node-hub/dora_rdt_1b/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import sys -from pathlib import Path - -# Define the path to the README file relative to the package directory -readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") - -# Read the content of the README file -try: - with open(readme_path, "r", encoding="utf-8") as f: - __doc__ = f.read() -except FileNotFoundError: - __doc__ = "README file not found." - - -# Set up the import hook - -submodule_path = Path(__file__).resolve().parent / "RoboticsDiffusionTransformer" -sys.path.insert(0, str(submodule_path))