From 5e03843f1e7b9fc39714a1c5e3985b5eedca4ddd Mon Sep 17 00:00:00 2001 From: haixuantao Date: Thu, 9 Jan 2025 18:11:28 +0100 Subject: [PATCH] Improve node documentation --- node-hub/dora-microphone/README.md | 47 +++++++++++++++++++++++++-- node-hub/dora-rerun/README.md | 52 ++++++++++++++++++++++-------- node-hub/dora-vad/README.md | 42 ++++++++++++++++++++++++ node-hub/dora_rdt_1b/__init__.py | 19 ----------- 4 files changed, 125 insertions(+), 35 deletions(-) delete mode 100644 node-hub/dora_rdt_1b/__init__.py diff --git a/node-hub/dora-microphone/README.md b/node-hub/dora-microphone/README.md index 465a6243..021d9db6 100644 --- a/node-hub/dora-microphone/README.md +++ b/node-hub/dora-microphone/README.md @@ -1,5 +1,48 @@ -# Dora Node for recording data from microphone +# Collect data from microphone This node will send data as soon as the microphone volume is higher than a threshold. -Check example at [examples/speech-to-text](examples/speech-to-text) +This is using python Sounddevice. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - tick: This is used to detect when the dataflow is finished. +- outputs: + - audio: 16kHz sampled audio sent by chunk + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-microphone + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone + - website: http://dora-rs.ai/docs/nodes/microphone +- sounddevice + - website: https://python-sounddevice.readthedocs.io/en/0.5.1/ + - github: https://github.com/spatialaudio/python-sounddevice/tree/master + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-rerun/README.md b/node-hub/dora-rerun/README.md index 5a6ee21f..0082cad9 100644 --- a/node-hub/dora-rerun/README.md +++ b/node-hub/dora-rerun/README.md @@ -7,25 +7,27 @@ This nodes is still experimental and format for passing Images, Bounding boxes, ## Getting Started ```bash -cargo install --force rerun-cli@0.15.1 - -## To install this package -git clone git@github.com:dora-rs/dora.git -cargo install --git https://github.com/dora-rs/dora dora-rerun +pip install dora-rerun ``` ## Adding to existing graph: ```yaml -- id: rerun - custom: - source: dora-rerun - inputs: - image: webcam/image - text: webcam/text - boxes2d: object_detection/bbox - envs: - RERUN_MEMORY_LIMIT: 25% +- id: plot + build: pip install dora-rerun + path: dora-rerun + inputs: + image: + source: camera/image + queue_size: 1 + text_qwenvl: dora-qwenvl/text + text_whisper: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 + README: | + # Visualization + RERUN_MEMORY_LIMIT: 25% ``` ## Input definition @@ -67,3 +69,25 @@ Make sure to name the dataflow as follows: ## Configurations - RERUN_MEMORY_LIMIT: Rerun memory limit + +## Reference documentation + +- dora-rerun + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-rerun + - website: http://dora-rs.ai/docs/nodes/rerun +- rerun + - github: https://github.com/rerun-io/rerun + - website: https://rerun.io + +## Examples + +- speech to text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt +- vision language model + - github: https://github.com/dora-rs/dora/blob/main/examples/vlm + - website: https://dora-rs.ai/docs/examples/vlm + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora-vad/README.md b/node-hub/dora-vad/README.md index b5e7ad8a..ba41cb10 100644 --- a/node-hub/dora-vad/README.md +++ b/node-hub/dora-vad/README.md @@ -1,3 +1,45 @@ # Speech Activity Detection(VAD) This is using Silero VAD. + +It detects beginning and ending of voice activity within a stream of audio and returns the parts that contains activity. + +There's a maximum amount of voice duration, to avoid having no input for too long. + +## Input/Output Specification + +- inputs: + - audio: 8kHz or 16kHz sample rate. +- outputs: + - audio: Same as input but truncated + +## YAML Specification + +```yaml +- id: dora-vad + description: Voice activity detection. See; sidero + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio +``` + +## Reference documentation + +- dora-sidero + - github: https://github.com/dora-rs/dora/blob/main/node-hub/dora-vad + - website: http://dora-rs.ai/docs/nodes/sidero +- Sidero + - github https://github.com/snakers4/silero-vad + +## Examples + +- Speech to Text + - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text + - website: https://dora-rs.ai/docs/examples/stt + +## License + +The code and model weights are released under the MIT License. diff --git a/node-hub/dora_rdt_1b/__init__.py b/node-hub/dora_rdt_1b/__init__.py deleted file mode 100644 index ed4e2191..00000000 --- a/node-hub/dora_rdt_1b/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import sys -from pathlib import Path - -# Define the path to the README file relative to the package directory -readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") - -# Read the content of the README file -try: - with open(readme_path, "r", encoding="utf-8") as f: - __doc__ = f.read() -except FileNotFoundError: - __doc__ = "README file not found." - - -# Set up the import hook - -submodule_path = Path(__file__).resolve().parent / "RoboticsDiffusionTransformer" -sys.path.insert(0, str(submodule_path))