From 763313cc7181deb15f65434d469e037004200d54 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Fri, 3 Jan 2025 13:04:31 +0100 Subject: [PATCH 1/9] Fix readme to use run instead of up/start --- examples/speech-to-text/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/speech-to-text/README.md b/examples/speech-to-text/README.md index 1853d8a4..1330553b 100644 --- a/examples/speech-to-text/README.md +++ b/examples/speech-to-text/README.md @@ -3,10 +3,8 @@ Make sure to have, dora, pip and cargo installed. ```bash -dora up dora build dataflow.yml -dora start dataflow.yml +dora run dataflow.yml -# In another terminal -terminal-print +# Wait for the whisper model to download which can takes a bit of time. ``` From 6691024b421fb72af8758a766ac708f4e4350953 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Fri, 3 Jan 2025 13:07:11 +0100 Subject: [PATCH 2/9] Make microphone able to stop if there is a tick input --- examples/speech-to-text/dataflow.yml | 2 ++ node-hub/dora-microphone/dora_microphone/main.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/speech-to-text/dataflow.yml b/examples/speech-to-text/dataflow.yml index bad75e7e..1742bd9f 100644 --- a/examples/speech-to-text/dataflow.yml +++ b/examples/speech-to-text/dataflow.yml @@ -2,6 +2,8 @@ nodes: - id: dora-microphone build: pip install -e ../../node-hub/dora-microphone path: dora-microphone + inputs: + tick: dora/timer/millis/2000 outputs: - audio diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py index cb65b6ac..d709dc7b 100644 --- a/node-hub/dora-microphone/dora_microphone/main.py +++ b/node-hub/dora-microphone/dora_microphone/main.py @@ -16,6 +16,8 @@ def main(): start_recording_time = tm.time() node = Node() + always_none = node.next(timeout=0.001) is None + # pylint: disable=unused-argument def callback(indata, frames, time, status): nonlocal buffer, node, start_recording_time @@ -32,5 +34,10 @@ def main(): with sd.InputStream( callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE ): - while True: - sd.sleep(int(100 * 1000)) + event_stream_is_none = False + while not event_stream_is_none: + if not always_none: + event = node.next() + event_stream_is_none = event is None + else: + sd.sleep(int(1000)) From 90a71b8e1deefe1f07812fffb42bae27b06faca8 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Fri, 3 Jan 2025 13:31:35 +0100 Subject: [PATCH 3/9] Make VLM macos compatible --- examples/speech-to-text/README.md | 2 +- examples/vlm/README.md | 9 +++++++++ node-hub/dora-qwenvl/dora_qwenvl/main.py | 7 ++++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/speech-to-text/README.md b/examples/speech-to-text/README.md index 1330553b..ffe7ca53 100644 --- a/examples/speech-to-text/README.md +++ b/examples/speech-to-text/README.md @@ -1,4 +1,4 @@ -# Dora echo example +# Dora Speech to Text example Make sure to have, dora, pip and cargo installed. diff --git a/examples/vlm/README.md b/examples/vlm/README.md index ab6eec8c..471586de 100644 --- a/examples/vlm/README.md +++ b/examples/vlm/README.md @@ -1 +1,10 @@ # Quick example on using a VLM with dora-rs + +Make sure to have, dora, pip and cargo installed. + +```bash +dora build dataflow.yml +dora run dataflow.yml + +# Wait for the qwenvl model to download which can takes a bit of time. +``` diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index 31f11502..4407ce27 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -85,7 +85,12 @@ def generate(frames: dict, question): return_tensors="pt", ) - device = "cuda:0" if torch.cuda.is_available() else "cpu" + if torch.backends.mps.is_available(): + device = torch.device("mps") + elif torch.cuda.is_available(): + device = torch.device("cuda", 0) + else: + device = torch.device("cpu") inputs = inputs.to(device) # Inference: Generation of the output From fc8bc8a4fe99ee939c0f4b9e45911bce13e7d469 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Fri, 3 Jan 2025 14:00:32 +0100 Subject: [PATCH 4/9] Improve vlm by adding speech to text within example --- examples/vlm/README.md | 7 +++- examples/vlm/dataflow.yml | 42 ++++++++++++++++--- .../{dataflow_rerun.yml => vision_only.yml} | 0 .../dora-microphone/dora_microphone/main.py | 15 ++++--- node-hub/dora-qwenvl/pyproject.toml | 2 +- 5 files changed, 50 insertions(+), 16 deletions(-) rename examples/vlm/{dataflow_rerun.yml => vision_only.yml} (100%) diff --git a/examples/vlm/README.md b/examples/vlm/README.md index 471586de..92b22669 100644 --- a/examples/vlm/README.md +++ b/examples/vlm/README.md @@ -3,8 +3,13 @@ Make sure to have, dora, pip and cargo installed. ```bash +dora build vision_only.yml +dora run vision_only.yml + +# Wait for the qwenvl model to download which can takes a bit of time. + dora build dataflow.yml dora run dataflow.yml -# Wait for the qwenvl model to download which can takes a bit of time. +# Wait for the qwenvl, whisper model to download which can takes a bit of time. ``` diff --git a/examples/vlm/dataflow.yml b/examples/vlm/dataflow.yml index 51908b37..2c0846bb 100644 --- a/examples/vlm/dataflow.yml +++ b/examples/vlm/dataflow.yml @@ -1,4 +1,30 @@ nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + - id: camera build: pip install -e ../../node-hub/opencv-video-capture path: opencv-video-capture @@ -18,20 +44,24 @@ nodes: image: source: camera/image queue_size: 1 - tick: dora/timer/millis/400 + text: dora-distil-whisper/text outputs: - text - - tick env: DEFAULT_QUESTION: Describe the image in a very short sentence. - # For China # USE_MODELSCOPE_HUB: true - id: plot - build: pip install -e ../../node-hub/opencv-plot - path: opencv-plot + build: cargo build -p dora-rerun --release + path: dora-rerun inputs: image: source: camera/image queue_size: 1 - text: dora-qwenvl/tick + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization of QwenVL2 diff --git a/examples/vlm/dataflow_rerun.yml b/examples/vlm/vision_only.yml similarity index 100% rename from examples/vlm/dataflow_rerun.yml rename to examples/vlm/vision_only.yml diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py index d709dc7b..aa8e03c6 100644 --- a/node-hub/dora-microphone/dora_microphone/main.py +++ b/node-hub/dora-microphone/dora_microphone/main.py @@ -17,14 +17,18 @@ def main(): node = Node() always_none = node.next(timeout=0.001) is None + finished = False # pylint: disable=unused-argument def callback(indata, frames, time, status): - nonlocal buffer, node, start_recording_time + nonlocal buffer, node, start_recording_time, finished if tm.time() - start_recording_time > MAX_DURATION: audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 node.send_output("audio", pa.array(audio_data)) + if not always_none: + event = node.next(timeout=0.001) + finished = event is None buffer = [] start_recording_time = tm.time() else: @@ -34,10 +38,5 @@ def main(): with sd.InputStream( callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE ): - event_stream_is_none = False - while not event_stream_is_none: - if not always_none: - event = node.next() - event_stream_is_none = event is None - else: - sd.sleep(int(1000)) + while not finished: + sd.sleep(int(1000)) diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml index a662ded8..346c5f9f 100644 --- a/node-hub/dora-qwenvl/pyproject.toml +++ b/node-hub/dora-qwenvl/pyproject.toml @@ -15,7 +15,7 @@ python = "^3.7" dora-rs = "^0.3.6" numpy = "< 2.0.0" torch = "^2.2.0" -torchvision = "^0.19" +torchvision = "^0.20" transformers = "^4.45" qwen-vl-utils = "^0.0.2" accelerate = "^0.33" From 0fe6e896353ec5551189d3925de282cb2575c58d Mon Sep 17 00:00:00 2001 From: haixuantao Date: Fri, 3 Jan 2025 22:50:37 +0100 Subject: [PATCH 5/9] Make dora error not panic but a warning --- node-hub/dora-qwenvl/dora_qwenvl/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index 4407ce27..475d1f06 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -186,7 +186,7 @@ def main(): ) elif event_type == "ERROR": - raise RuntimeError(event["error"]) + print("Event Error:" + event["error"]) if __name__ == "__main__": From d3ac0a3d33fc38fa260a309fd99492fb9e7092ed Mon Sep 17 00:00:00 2001 From: haixuantao Date: Mon, 6 Jan 2025 14:28:21 +0100 Subject: [PATCH 6/9] Improve `README` and create 2 dataflow, one for dev and one for full remote version --- examples/speech-to-text/README.md | 25 +- .../{dataflow.yml => whisper-dev.yml} | 0 examples/speech-to-text/whisper.yml | 33 + examples/vlm/{dataflow.yml => qwenvl-dev.yml} | 0 examples/vlm/qwenvl.yml | 1968 +++++++++++++++++ examples/vlm/vision_only.yml | 41 - 6 files changed, 2024 insertions(+), 43 deletions(-) rename examples/speech-to-text/{dataflow.yml => whisper-dev.yml} (100%) create mode 100644 examples/speech-to-text/whisper.yml rename examples/vlm/{dataflow.yml => qwenvl-dev.yml} (100%) create mode 100755 examples/vlm/qwenvl.yml delete mode 100644 examples/vlm/vision_only.yml diff --git a/examples/speech-to-text/README.md b/examples/speech-to-text/README.md index ffe7ca53..c8ef9109 100644 --- a/examples/speech-to-text/README.md +++ b/examples/speech-to-text/README.md @@ -3,8 +3,29 @@ Make sure to have, dora, pip and cargo installed. ```bash -dora build dataflow.yml -dora run dataflow.yml +dora build whisper.yml +dora run whisper.yml # Wait for the whisper model to download which can takes a bit of time. ``` + +## Graph Visualization + +```mermaid + +flowchart TB + dora-microphone + dora-vad + dora-distil-whisper + dora-rerun[/dora-rerun\] +subgraph ___dora___ [dora] + subgraph ___timer_timer___ [timer] + dora/timer/secs/2[\secs/2/] + end +end + dora/timer/secs/2 -- tick --> dora-microphone + dora-microphone -- audio --> dora-vad + dora-vad -- audio as input --> dora-distil-whisper + dora-distil-whisper -- text as original_text --> dora-rerun + +``` diff --git a/examples/speech-to-text/dataflow.yml b/examples/speech-to-text/whisper-dev.yml similarity index 100% rename from examples/speech-to-text/dataflow.yml rename to examples/speech-to-text/whisper-dev.yml diff --git a/examples/speech-to-text/whisper.yml b/examples/speech-to-text/whisper.yml new file mode 100644 index 00000000..479d495f --- /dev/null +++ b/examples/speech-to-text/whisper.yml @@ -0,0 +1,33 @@ +nodes: + - id: dora-microphone + description: Microphone + build: pip install dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-whisper + build: pip install dora-whisper + path: dora-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: dora-rerun + build: pip install dora-rerun + path: dora-rerun + inputs: + original_text: dora-whisper/text diff --git a/examples/vlm/dataflow.yml b/examples/vlm/qwenvl-dev.yml similarity index 100% rename from examples/vlm/dataflow.yml rename to examples/vlm/qwenvl-dev.yml diff --git a/examples/vlm/qwenvl.yml b/examples/vlm/qwenvl.yml new file mode 100755 index 00000000..0889ef54 --- /dev/null +++ b/examples/vlm/qwenvl.yml @@ -0,0 +1,1968 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + File not found · GitHub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+ Skip to content + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + +
+ + + + + + + + + +
+
+
+ + + + + + + + + + + + +
+ +
+ +
+ +
+ + + + / + + dora + + + Public +
+ + +
+ +
+ + +
+
+ +
+
+ + + + +
+ + + + + + +
+ + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+ +
+
+ +
+ +
+

Footer

+ + + + +
+
+ + + + + © 2025 GitHub, Inc. + +
+ + +
+
+ + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + diff --git a/examples/vlm/vision_only.yml b/examples/vlm/vision_only.yml deleted file mode 100644 index 6c933a2f..00000000 --- a/examples/vlm/vision_only.yml +++ /dev/null @@ -1,41 +0,0 @@ -nodes: - - id: camera - build: pip install -e ../../node-hub/opencv-video-capture - path: opencv-video-capture - inputs: - tick: dora/timer/millis/50 - outputs: - - image - env: - CAPTURE_PATH: 0 - IMAGE_WIDTH: 640 - IMAGE_HEIGHT: 480 - - - id: dora-qwenvl - build: pip install -e ../../node-hub/dora-qwenvl - path: dora-qwenvl - inputs: - image: - source: camera/image - queue_size: 1 - tick: dora/timer/millis/400 - outputs: - - text - - tick - env: - DEFAULT_QUESTION: Describe the image in a very short sentence. - # USE_MODELSCOPE_HUB: true - - - id: plot - build: cargo build -p dora-rerun --release - path: dora-rerun - inputs: - image: - source: camera/image - queue_size: 1 - text: dora-qwenvl/tick - env: - IMAGE_WIDTH: 640 - IMAGE_HEIGHT: 480 - README: | - # Visualization of QwenVL2 From 036c9fa93cb0760ff859a77d12659d4feed6dc63 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Mon, 6 Jan 2025 17:03:27 +0100 Subject: [PATCH 7/9] Improve README on examples --- examples/speech-to-text/README.md | 4 +- examples/speech-to-text/whisper.yml | 4 +- examples/vlm/README.md | 8 +- examples/vlm/qwenvl.yml | 2035 +---------------- node-hub/dora-distil-whisper/README.md | 31 +- node-hub/dora-qwenvl/README.md | 29 + .../dora_rdt_1b/RoboticsDiffusionTransformer | 2 +- 7 files changed, 132 insertions(+), 1981 deletions(-) diff --git a/examples/speech-to-text/README.md b/examples/speech-to-text/README.md index c8ef9109..6111a79c 100644 --- a/examples/speech-to-text/README.md +++ b/examples/speech-to-text/README.md @@ -3,8 +3,8 @@ Make sure to have, dora, pip and cargo installed. ```bash -dora build whisper.yml -dora run whisper.yml +dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-text/whisper.yml +dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-text/whisper.yml # Wait for the whisper model to download which can takes a bit of time. ``` diff --git a/examples/speech-to-text/whisper.yml b/examples/speech-to-text/whisper.yml index 479d495f..ce919154 100644 --- a/examples/speech-to-text/whisper.yml +++ b/examples/speech-to-text/whisper.yml @@ -17,8 +17,8 @@ nodes: - audio - id: dora-whisper - build: pip install dora-whisper - path: dora-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper inputs: input: dora-vad/audio outputs: diff --git a/examples/vlm/README.md b/examples/vlm/README.md index 92b22669..25db91be 100644 --- a/examples/vlm/README.md +++ b/examples/vlm/README.md @@ -3,13 +3,9 @@ Make sure to have, dora, pip and cargo installed. ```bash -dora build vision_only.yml -dora run vision_only.yml +dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/vlm/qwenvl.yml -# Wait for the qwenvl model to download which can takes a bit of time. - -dora build dataflow.yml -dora run dataflow.yml +dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/vlm/qwenvl.yml # Wait for the qwenvl, whisper model to download which can takes a bit of time. ``` diff --git a/examples/vlm/qwenvl.yml b/examples/vlm/qwenvl.yml index 0889ef54..796b9b1a 100755 --- a/examples/vlm/qwenvl.yml +++ b/examples/vlm/qwenvl.yml @@ -1,1968 +1,67 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - File not found · GitHub - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
- Skip to content - - - - - - - - - - - - -
-
- - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - -
- - - - - - - - - -
-
-
- - - - - - - - - - - - -
- -
- -
- -
- - - - / - - dora - - - Public -
- - -
- -
- - -
-
- -
-
- - - - -
- - - - - - -
- - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
- -
-
- -
- -
-

Footer

- - - - -
-
- - - - - © 2025 GitHub, Inc. - -
- - -
-
- - - - - - - - - - - - - - - - - - - -
- -
-
- - - +nodes: + - id: dora-microphone + build: pip install dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: camera + build: pip install opencv-video-capture + path: opencv-video-capture + inputs: + tick: dora/timer/millis/50 + outputs: + - image + env: + CAPTURE_PATH: 0 + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + + - id: dora-qwenvl + build: pip install dora-qwenvl + path: dora-qwenvl + inputs: + image: + source: camera/image + queue_size: 1 + text: dora-distil-whisper/text + outputs: + - text + env: + DEFAULT_QUESTION: Describe the image in a very short sentence. + # USE_MODELSCOPE_HUB: true + + - id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization of QwenVL2 diff --git a/node-hub/dora-distil-whisper/README.md b/node-hub/dora-distil-whisper/README.md index f1707fc9..0c1854a4 100644 --- a/node-hub/dora-distil-whisper/README.md +++ b/node-hub/dora-distil-whisper/README.md @@ -1,3 +1,30 @@ -# Dora Node for transforming speech to text (English only) +# Dora Whisper Node for transforming speech to text -Check example at [examples/speech-to-text](examples/speech-to-text) +## YAML Specification + +This node is supposed to be used as follows: + +```yaml +- id: dora-distil-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english +``` + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt +- Vision Language Model + - github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - website: https://dora-rs.ai/docs/examples/vlm + +## License + +Dora-whisper's code and model weights are released under the MIT License diff --git a/node-hub/dora-qwenvl/README.md b/node-hub/dora-qwenvl/README.md index 88f4e564..535ff3c3 100644 --- a/node-hub/dora-qwenvl/README.md +++ b/node-hub/dora-qwenvl/README.md @@ -1,3 +1,32 @@ # Dora QwenVL2 node Experimental node for using a VLM within dora. + +## YAML Specification + +This node is supposed to be used as follows: + +```yaml +- id: dora-qwenvl + build: pip install dora-qwenvl + path: dora-qwenvl + inputs: + image: + source: camera/image + queue_size: 1 + text: dora-distil-whisper/text + outputs: + - text + env: + DEFAULT_QUESTION: Describe the image in a very short sentence. +``` + +## Additional documentation + +- Qwenvl: https://github.com/QwenLM/Qwen-VL + +## Examples + +- Vision Language Model + - Github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - Website: https://dora-rs.ai/docs/examples/vlm diff --git a/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer b/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer index b2889e65..198374ea 160000 --- a/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer +++ b/node-hub/dora-rdt-1b/dora_rdt_1b/RoboticsDiffusionTransformer @@ -1 +1 @@ -Subproject commit b2889e65cfe62571ced3ce88f00e7d80b41fee69 +Subproject commit 198374ea8c4a2ec2ddae86c35448d21aa9756f37 From 5e03843f1e7b9fc39714a1c5e3985b5eedca4ddd Mon Sep 17 00:00:00 2001 From: haixuantao Date: Thu, 9 Jan 2025 18:11:28 +0100 Subject: [PATCH 8/9] Improve node documentation --- node-hub/dora-microphone/README.md | 47 +++++++++++++++++++++++++-- node-hub/dora-rerun/README.md | 52 ++++++++++++++++++++++-------- node-hub/dora-vad/README.md | 42 ++++++++++++++++++++++++ node-hub/dora_rdt_1b/__init__.py | 19 ----------- 4 files changed, 125 insertions(+), 35 deletions(-) delete mode 100644 node-hub/dora_rdt_1b/__init__.py diff --git a/node-hub/dora-microphone/README.md b/node-hub/dora-microphone/README.md index 465a6243..021d9db6 100644 --- a/node-hub/dora-microphone/README.md +++ b/node-hub/dora-microphone/README.md @@ -1,5 +1,48 @@ -# Dora Node for recording data from microphone +# Collect data from microphone This node will send data as soon as the microphone volume is higher than a threshold. -Check example at [examples/speech-to-text](examples/speech-to-text) +This is using python Sounddevice. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - tick: This is used to detect when the dataflow is finished. +- outputs: + - audio: 16kHz sampled audio sent by chunk + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-microphone + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone + - website: http://dora-rs.ai/docs/nodes/microphone +- sounddevice + - website: https://python-sounddevice.readthedocs.io/en/0.5.1/ + - github: https://github.com/spatialaudio/python-sounddevice/tree/master + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md index 5a6ee21f..0082cad9 100644 --- a/node-hub/dora-rerun/README.md +++ b/node-hub/dora-rerun/README.md @@ -7,25 +7,27 @@ This nodes is still experimental and format for passing Images, Bounding boxes, ## Getting Started ```bash -cargo install --force rerun-cli@0.15.1 - -## To install this package -git clone git@github.com:dora-rs/dora.git -cargo install --git https://github.com/dora-rs/dora dora-rerun +pip install dora-rerun ``` ## Adding to existing graph: ```yaml -- id: rerun - custom: - source: dora-rerun - inputs: - image: webcam/image - text: webcam/text - boxes2d: object_detection/bbox - envs: - RERUN_MEMORY_LIMIT: 25% +- id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization + RERUN_MEMORY_LIMIT: 25% ``` ## Input definition @@ -67,3 +69,25 @@ Make sure to name the dataflow as follows: ## Configurations - RERUN_MEMORY_LIMIT: Rerun memory limit + +## Reference documentation + +- dora-rerun + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-rerun + - website: http://dora-rs.ai/docs/nodes/rerun +- rerun + - github: https://github.com/rerun-io/rerun + - website: https://rerun.io + +## Examples + +- speech to text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt +- vision language model + - github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - website: https://dora-rs.ai/docs/examples/vlm + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-vad/README.md b/node-hub/dora-vad/README.md index b5e7ad8a..ba41cb10 100644 --- a/node-hub/dora-vad/README.md +++ b/node-hub/dora-vad/README.md @@ -1,3 +1,45 @@ # Speech Activity Detection(VAD) This is using Silero VAD. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - audio: 8kHz or 16kHz sample rate. +- outputs: + - audio: Same as input but truncated + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-sidero + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-vad + - website: http://dora-rs.ai/docs/nodes/sidero +- Sidero + - github https://github.com/snakers4/silero-vad + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora_rdt_1b/__init__.py b/node-hub/dora_rdt_1b/__init__.py deleted file mode 100644 index ed4e2191..00000000 --- a/node-hub/dora_rdt_1b/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import sys -from pathlib import Path - -# Define the path to the README file relative to the package directory -readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") - -# Read the content of the README file -try: - with open(readme_path, "r", encoding="utf-8") as f: - __doc__ = f.read() -except FileNotFoundError: - __doc__ = "README file not found." - - -# Set up the import hook - -submodule_path = Path(__file__).resolve().parent / "RoboticsDiffusionTransformer" -sys.path.insert(0, str(submodule_path)) From f286009a7a2367e6a1408b26b128dfd82b92c3cc Mon Sep 17 00:00:00 2001 From: haixuantao Date: Sat, 11 Jan 2025 11:39:57 +0100 Subject: [PATCH 9/9] Add 2 missing `README.md` --- examples/python-ros2-dataflow/README.md | 9 +++++++++ examples/rust-dataflow/README.md | 7 +++++++ 2 files changed, 16 insertions(+) create mode 100644 examples/python-ros2-dataflow/README.md create mode 100644 examples/rust-dataflow/README.md diff --git a/examples/python-ros2-dataflow/README.md b/examples/python-ros2-dataflow/README.md new file mode 100644 index 00000000..b07f18fc --- /dev/null +++ b/examples/python-ros2-dataflow/README.md @@ -0,0 +1,9 @@ +# Quick Python ROS2 example + +To get started: + +```bash +source /opt/ros/humble/setup.bash && ros2 run turtlesim turtlesim_node & +source /opt/ros/humble/setup.bash && ros2 run examples_rclcpp_minimal_service service_main & +cargo run --example python-ros2-dataflow --features="ros2-examples" +``` diff --git a/examples/rust-dataflow/README.md b/examples/rust-dataflow/README.md new file mode 100644 index 00000000..71eaf706 --- /dev/null +++ b/examples/rust-dataflow/README.md @@ -0,0 +1,7 @@ +# Quick Rust example + +To get started: + +```bash +cargo run --example rust-dataflow +```