From 7debc07b4e2bd74675b97bc8588eb7f905c96a18 Mon Sep 17 00:00:00 2001 From: haixuanTao Date: Fri, 16 Aug 2024 13:09:08 +0200 Subject: [PATCH] adding keyboard listener, microphone, and whisper node --- node-hub/dora-distil-whisper/README.md | 3 + .../dora_distil_whisper/__init__.py | 11 ++++ .../dora_distil_whisper/main.py | 42 +++++++++++++ .../dora-distil-whisper/graphs/dataflow.yml | 20 ++++++ node-hub/dora-distil-whisper/pyproject.toml | 29 +++++++++ .../tests/test_arrow_sender.py | 2 + node-hub/dora-microphone/README.md | 3 + .../dora_microphone/__init__.py | 11 ++++ .../dora-microphone/dora_microphone/main.py | 61 +++++++++++++++++++ .../dora_microphone_save/__init__.py | 11 ++++ .../dora_microphone_save/main.py | 61 +++++++++++++++++++ node-hub/dora-microphone/pyproject.toml | 29 +++++++++ .../tests/test_arrow_sender.py | 2 + node-hub/keyboard-listener/README.md | 3 + .../keyboard_listener/__init__.py | 11 ++++ .../keyboard_listener/main.py | 19 ++++++ node-hub/keyboard-listener/pyproject.toml | 29 +++++++++ 17 files changed, 347 insertions(+) create mode 100644 node-hub/dora-distil-whisper/README.md create mode 100644 node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py create mode 100644 node-hub/dora-distil-whisper/dora_distil_whisper/main.py create mode 100644 node-hub/dora-distil-whisper/graphs/dataflow.yml create mode 100644 node-hub/dora-distil-whisper/pyproject.toml create mode 100644 node-hub/dora-distil-whisper/tests/test_arrow_sender.py create mode 100644 node-hub/dora-microphone/README.md create mode 100644 node-hub/dora-microphone/dora_microphone/__init__.py create mode 100644 node-hub/dora-microphone/dora_microphone/main.py create mode 100644 node-hub/dora-microphone/dora_microphone_save/__init__.py create mode 100644 node-hub/dora-microphone/dora_microphone_save/main.py create mode 100644 node-hub/dora-microphone/pyproject.toml create mode 100644 node-hub/dora-microphone/tests/test_arrow_sender.py create mode 100644 node-hub/keyboard-listener/README.md create mode 100644 node-hub/keyboard-listener/keyboard_listener/__init__.py create mode 100644 node-hub/keyboard-listener/keyboard_listener/main.py create mode 100644 node-hub/keyboard-listener/pyproject.toml diff --git a/node-hub/dora-distil-whisper/README.md b/node-hub/dora-distil-whisper/README.md new file mode 100644 index 00000000..125e029f --- /dev/null +++ b/node-hub/dora-distil-whisper/README.md @@ -0,0 +1,3 @@ +# Dora Node for sending arrow data. + +This node send DATA that is specified within the environemnt variable or from `--data` argument. diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py b/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py new file mode 100644 index 00000000..d938ddc8 --- /dev/null +++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py @@ -0,0 +1,42 @@ +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +from dora import Node +import pyarrow as pa +import os + +os.environ["TRANSFORMERS_OFFLINE"] = "1" + +device = "cuda:0" if torch.cuda.is_available() else "cpu" +torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + +model_id = "distil-whisper/distil-large-v3" + +model = AutoModelForSpeechSeq2Seq.from_pretrained( + model_id, + torch_dtype=torch_dtype, + low_cpu_mem_usage=True, + use_safetensors=True, + local_files_only=True, +) +model.to(device) + +processor = AutoProcessor.from_pretrained(model_id) +pipe = pipeline( + "automatic-speech-recognition", + model=model, + tokenizer=processor.tokenizer, + feature_extractor=processor.feature_extractor, + max_new_tokens=128, + torch_dtype=torch_dtype, + device=device, + generate_kwargs={"language": "chinese"}, +) + + +def main(): + node = Node() + for event in node: + if event["type"] == "INPUT": + audio = event["value"].to_numpy() + result = pipe(audio) + node.send_output("text", pa.array([result["text"]])) diff --git a/node-hub/dora-distil-whisper/graphs/dataflow.yml b/node-hub/dora-distil-whisper/graphs/dataflow.yml new file mode 100644 index 00000000..5c93226f --- /dev/null +++ b/node-hub/dora-distil-whisper/graphs/dataflow.yml @@ -0,0 +1,20 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../. + path: dora-distil-whisper + inputs: + audio: dora-microphone/audio + outputs: + - text + + - id: terminal-print + build: cargo build -p terminal-print + path: dynamic + inputs: + text: dora-distil-whisper/text diff --git a/node-hub/dora-distil-whisper/pyproject.toml b/node-hub/dora-distil-whisper/pyproject.toml new file mode 100644 index 00000000..e232f3aa --- /dev/null +++ b/node-hub/dora-distil-whisper/pyproject.toml @@ -0,0 +1,29 @@ +[tool.poetry] +name = "dora-distil-whisper" +version = "0.3.5" +authors = [ + "Haixuan Xavier Tao ", + "Enzo Le Van ", +] +description = "Dora dora-distil-whisper" +license = "MIT License" +homepage = "https://github.com/dora-rs/dora.git" +documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md" +readme = "README.md" +packages = [{ include = "dora_distil_whisper" }] + +[tool.poetry.dependencies] +dora-rs = "0.3.5" +numpy = "< 2.0.0" +pyarrow = ">= 5.0.0" +transformers = ">= 4.0.0" + +[tool.poetry.scripts] +dora-distil-whisper = "dora_distil_whisper.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" + +[project] +readme = "README.md" diff --git a/node-hub/dora-distil-whisper/tests/test_arrow_sender.py b/node-hub/dora-distil-whisper/tests/test_arrow_sender.py new file mode 100644 index 00000000..201975fc --- /dev/null +++ b/node-hub/dora-distil-whisper/tests/test_arrow_sender.py @@ -0,0 +1,2 @@ +def test_placeholder(): + pass diff --git a/node-hub/dora-microphone/README.md b/node-hub/dora-microphone/README.md new file mode 100644 index 00000000..125e029f --- /dev/null +++ b/node-hub/dora-microphone/README.md @@ -0,0 +1,3 @@ +# Dora Node for sending arrow data. + +This node send DATA that is specified within the environemnt variable or from `--data` argument. diff --git a/node-hub/dora-microphone/dora_microphone/__init__.py b/node-hub/dora-microphone/dora_microphone/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-microphone/dora_microphone/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py new file mode 100644 index 00000000..dc05554d --- /dev/null +++ b/node-hub/dora-microphone/dora_microphone/main.py @@ -0,0 +1,61 @@ +import sounddevice as sd +import numpy as np +import pyarrow as pa +import time as tm +from enum import Enum + +from dora import Node + + +class RecordingState(Enum): + """Enum for recording states.""" + + PENDING = 0 + RUNNING = 1 + SILENCE = 2 + + +def detect_speech(audio_data, threshold): + """Check if the amplitude of the audio signal exceeds the threshold.""" + return np.any(np.abs(audio_data) > threshold) + + +def main(): + # Parameters + threshold = 500 # Threshold for detecting speech (adjust this as needed) + SAMPLE_RATE = 16000 + silence_duration = 0.5 # Duration of silence before stopping the recording + + # Initialize buffer and recording flag + buffer = [] + state = RecordingState.PENDING + silence_start_time = tm.time() + node = Node() + + def callback(indata, frames, time, status): + nonlocal buffer, state, silence_start_time, node + + is_speaking = detect_speech(indata[:, 0], threshold) + if is_speaking: + if state == RecordingState.PENDING: + buffer = [] + state = RecordingState.RUNNING + buffer.extend(indata[:, 0]) + elif not is_speaking and state == RecordingState.RUNNING: + silence_start_time = tm.time() # Reset silence timer + buffer.extend(indata[:, 0]) + state = RecordingState.SILENCE + elif not is_speaking and state == RecordingState.SILENCE: + if tm.time() - silence_start_time > silence_duration: + audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 + node.send_output("audio", pa.array(audio_data)) + state = RecordingState.PENDING + else: + buffer.extend(indata[:, 0]) + + # Start recording + with sd.InputStream( + callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE + ): + while True: + sd.sleep(int(100 * 1000)) diff --git a/node-hub/dora-microphone/dora_microphone_save/__init__.py b/node-hub/dora-microphone/dora_microphone_save/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-microphone/dora_microphone_save/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-microphone/dora_microphone_save/main.py b/node-hub/dora-microphone/dora_microphone_save/main.py new file mode 100644 index 00000000..ad3e9741 --- /dev/null +++ b/node-hub/dora-microphone/dora_microphone_save/main.py @@ -0,0 +1,61 @@ +import sounddevice as sd +import numpy as np +import pyarrow as pa +import time as tm +from enum import Enum + +from dora import Node + + +class RecordingState(Enum): + """Enum for recording states.""" + + PENDING = 0 + RUNNING = 1 + SILENCE = 2 + + +def detect_speech(audio_data, threshold): + """Check if the amplitude of the audio signal exceeds the threshold.""" + return np.any(np.abs(audio_data) > threshold) + + +def main(): + # Parameters + threshold = 500 # Threshold for detecting speech (adjust this as needed) + SAMPLE_RATE = 16000 + silence_duration = 4 # Duration of silence before stopping the recording + + # Initialize buffer and recording flag + buffer = [] + state = RecordingState.PENDING + silence_start_time = tm.time() + node = Node() + + def callback(indata, frames, time, status): + nonlocal buffer, state, silence_start_time, node + + is_speaking = detect_speech(indata[:, 0], threshold) + if is_speaking: + if state == RecordingState.PENDING: + buffer = [] + state = RecordingState.RUNNING + buffer.extend(indata[:, 0]) + elif not is_speaking and state == RecordingState.RUNNING: + silence_start_time = tm.time() # Reset silence timer + buffer.extend(indata[:, 0]) + state = RecordingState.SILENCE + elif not is_speaking and state == RecordingState.SILENCE: + if tm.time() - silence_start_time > silence_duration: + audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 + node.send_output("audio", pa.array(audio_data)) + state = RecordingState.PENDING + else: + buffer.extend(indata[:, 0]) + + # Start recording + with sd.InputStream( + callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE + ): + while True: + sd.sleep(int(100 * 1000)) diff --git a/node-hub/dora-microphone/pyproject.toml b/node-hub/dora-microphone/pyproject.toml new file mode 100644 index 00000000..45aecb30 --- /dev/null +++ b/node-hub/dora-microphone/pyproject.toml @@ -0,0 +1,29 @@ +[tool.poetry] +name = "dora-microphone" +version = "0.3.5" +authors = [ + "Haixuan Xavier Tao ", + "Enzo Le Van ", +] +description = "Dora dora-microphone" +license = "MIT License" +homepage = "https://github.com/dora-rs/dora.git" +documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone/README.md" +readme = "README.md" +packages = [{ include = "dora_microphone" }] + +[tool.poetry.dependencies] +dora-rs = "0.3.5" +numpy = "< 2.0.0" +pyarrow = ">= 5.0.0" +sounddevice = "^0.4.6" + +[tool.poetry.scripts] +dora-microphone = "dora_microphone.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" + +[project] +readme = "README.md" diff --git a/node-hub/dora-microphone/tests/test_arrow_sender.py b/node-hub/dora-microphone/tests/test_arrow_sender.py new file mode 100644 index 00000000..201975fc --- /dev/null +++ b/node-hub/dora-microphone/tests/test_arrow_sender.py @@ -0,0 +1,2 @@ +def test_placeholder(): + pass diff --git a/node-hub/keyboard-listener/README.md b/node-hub/keyboard-listener/README.md new file mode 100644 index 00000000..125e029f --- /dev/null +++ b/node-hub/keyboard-listener/README.md @@ -0,0 +1,3 @@ +# Dora Node for sending arrow data. + +This node send DATA that is specified within the environemnt variable or from `--data` argument. diff --git a/node-hub/keyboard-listener/keyboard_listener/__init__.py b/node-hub/keyboard-listener/keyboard_listener/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/keyboard-listener/keyboard_listener/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/keyboard-listener/keyboard_listener/main.py b/node-hub/keyboard-listener/keyboard_listener/main.py new file mode 100644 index 00000000..6ba2b49b --- /dev/null +++ b/node-hub/keyboard-listener/keyboard_listener/main.py @@ -0,0 +1,19 @@ +from pynput import keyboard +from pynput.keyboard import Key, Events +import pyarrow as pa +from dora import Node + + +node = Node() + + +with keyboard.Events() as events: + while True: + event = events.get(1.0) + if event is not None and isinstance(event, Events.Press): + if hasattr(event.key, "char"): + if event.key.char is not None: + node.send_output("char", pa.array([event.key.char])) + # busy_wait(0.1) + # if event is not None and isinstance(event, Events.Release): + # node.send_output("move", pa.array([0.0, 0, 0, 0, 0, 0])) diff --git a/node-hub/keyboard-listener/pyproject.toml b/node-hub/keyboard-listener/pyproject.toml new file mode 100644 index 00000000..0f7a2a18 --- /dev/null +++ b/node-hub/keyboard-listener/pyproject.toml @@ -0,0 +1,29 @@ +[tool.poetry] +name = "keyboard-listener" +version = "0.3.5" +authors = [ + "Haixuan Xavier Tao ", + "Enzo Le Van ", +] +description = "Dora keyboard-listener" +license = "MIT License" +homepage = "https://github.com/dora-rs/dora.git" +documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/keyboard-listener/README.md" +readme = "README.md" +packages = [{ include = "keyboard_listener" }] + +[tool.poetry.dependencies] +dora-rs = "0.3.5" +numpy = "< 2.0.0" +pyarrow = ">= 5.0.0" +pynput = "^1.7.6" + +[tool.poetry.scripts] +keyboard-listener = "keyboard_listener.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" + +[project] +readme = "README.md"