adding keyboard listener, microphone, and whisper node

1 year ago · 7debc07b4e
--- a/node-hub/dora-distil-whisper/README.md
+++ b/node-hub/dora-distil-whisper/README.md
@@ -0,0 +1,3 @@
 # Dora Node for sending arrow data.

 This node send DATA that is specified within the environemnt variable or from `--data` argument.
--- a/node-hub/dora-distil-whisper/dora_distil_whisper/init.py
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/init.py
@@ -0,0 +1,11 @@
 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, "r", encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
@@ -0,0 +1,42 @@
 import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from dora import Node
 import pyarrow as pa
 import os

 os.environ["TRANSFORMERS_OFFLINE"] = "1"

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

 model_id = "distil-whisper/distil-large-v3"

 model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id,
    torch_dtype=torch_dtype,
    low_cpu_mem_usage=True,
    use_safetensors=True,
    local_files_only=True,
 )
 model.to(device)

 processor = AutoProcessor.from_pretrained(model_id)
 pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    torch_dtype=torch_dtype,
    device=device,
    generate_kwargs={"language": "chinese"},
 )


 def main():
    node = Node()
    for event in node:
        if event["type"] == "INPUT":
            audio = event["value"].to_numpy()
            result = pipe(audio)
            node.send_output("text", pa.array([result["text"]]))
--- a/node-hub/dora-distil-whisper/graphs/dataflow.yml
+++ b/node-hub/dora-distil-whisper/graphs/dataflow.yml
@@ -0,0 +1,20 @@
 nodes:
  - id: dora-microphone
    build: pip install -e ../../dora-microphone
    path: dora-microphone
    outputs:
      - audio

  - id: dora-distil-whisper
    build: pip install -e ../.
    path: dora-distil-whisper
    inputs:
      audio: dora-microphone/audio
    outputs:
      - text

  - id: terminal-print
    build: cargo build -p terminal-print
    path: dynamic
    inputs:
      text: dora-distil-whisper/text
--- a/node-hub/dora-distil-whisper/pyproject.toml
+++ b/node-hub/dora-distil-whisper/pyproject.toml
@@ -0,0 +1,29 @@
 [tool.poetry]
 name = "dora-distil-whisper"
 version = "0.3.5"
 authors = [
    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
    "Enzo Le Van <dev@enzo-le-van.fr>",
 ]
 description = "Dora dora-distil-whisper"
 license = "MIT License"
 homepage = "https://github.com/dora-rs/dora.git"
 documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md"
 readme = "README.md"
 packages = [{ include = "dora_distil_whisper" }]

 [tool.poetry.dependencies]
 dora-rs = "0.3.5"
 numpy = "< 2.0.0"
 pyarrow = ">= 5.0.0"
 transformers = ">= 4.0.0"

 [tool.poetry.scripts]
 dora-distil-whisper = "dora_distil_whisper.main:main"

 [build-system]
 requires = ["poetry-core>=1.8.0"]
 build-backend = "poetry.core.masonry.api"

 [project]
 readme = "README.md"
--- a/node-hub/dora-distil-whisper/tests/test_arrow_sender.py
+++ b/node-hub/dora-distil-whisper/tests/test_arrow_sender.py
@@ -0,0 +1,2 @@
 def test_placeholder():
    pass
--- a/node-hub/dora-microphone/README.md
+++ b/node-hub/dora-microphone/README.md
@@ -0,0 +1,3 @@
 # Dora Node for sending arrow data.

 This node send DATA that is specified within the environemnt variable or from `--data` argument.
--- a/node-hub/dora-microphone/dora_microphone/init.py
+++ b/node-hub/dora-microphone/dora_microphone/init.py
@@ -0,0 +1,11 @@
 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, "r", encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/dora-microphone/dora_microphone/main.py
+++ b/node-hub/dora-microphone/dora_microphone/main.py
@@ -0,0 +1,61 @@
 import sounddevice as sd
 import numpy as np
 import pyarrow as pa
 import time as tm
 from enum import Enum

 from dora import Node


 class RecordingState(Enum):
    """Enum for recording states."""

    PENDING = 0
    RUNNING = 1
    SILENCE = 2


 def detect_speech(audio_data, threshold):
    """Check if the amplitude of the audio signal exceeds the threshold."""
    return np.any(np.abs(audio_data) > threshold)


 def main():
    # Parameters
    threshold = 500  # Threshold for detecting speech (adjust this as needed)
    SAMPLE_RATE = 16000
    silence_duration = 0.5  # Duration of silence before stopping the recording

    # Initialize buffer and recording flag
    buffer = []
    state = RecordingState.PENDING
    silence_start_time = tm.time()
    node = Node()

    def callback(indata, frames, time, status):
        nonlocal buffer, state, silence_start_time, node

        is_speaking = detect_speech(indata[:, 0], threshold)
        if is_speaking:
            if state == RecordingState.PENDING:
                buffer = []
                state = RecordingState.RUNNING
            buffer.extend(indata[:, 0])
        elif not is_speaking and state == RecordingState.RUNNING:
            silence_start_time = tm.time()  # Reset silence timer
            buffer.extend(indata[:, 0])
            state = RecordingState.SILENCE
        elif not is_speaking and state == RecordingState.SILENCE:
            if tm.time() - silence_start_time > silence_duration:
                audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
                node.send_output("audio", pa.array(audio_data))
                state = RecordingState.PENDING
            else:
                buffer.extend(indata[:, 0])

    # Start recording
    with sd.InputStream(
        callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
    ):
        while True:
            sd.sleep(int(100 * 1000))
--- a/node-hub/dora-microphone/dora_microphone_save/init.py
+++ b/node-hub/dora-microphone/dora_microphone_save/init.py
@@ -0,0 +1,11 @@
 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, "r", encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/dora-microphone/dora_microphone_save/main.py
+++ b/node-hub/dora-microphone/dora_microphone_save/main.py
@@ -0,0 +1,61 @@
 import sounddevice as sd
 import numpy as np
 import pyarrow as pa
 import time as tm
 from enum import Enum

 from dora import Node


 class RecordingState(Enum):
    """Enum for recording states."""

    PENDING = 0
    RUNNING = 1
    SILENCE = 2


 def detect_speech(audio_data, threshold):
    """Check if the amplitude of the audio signal exceeds the threshold."""
    return np.any(np.abs(audio_data) > threshold)


 def main():
    # Parameters
    threshold = 500  # Threshold for detecting speech (adjust this as needed)
    SAMPLE_RATE = 16000
    silence_duration = 4  # Duration of silence before stopping the recording

    # Initialize buffer and recording flag
    buffer = []
    state = RecordingState.PENDING
    silence_start_time = tm.time()
    node = Node()

    def callback(indata, frames, time, status):
        nonlocal buffer, state, silence_start_time, node

        is_speaking = detect_speech(indata[:, 0], threshold)
        if is_speaking:
            if state == RecordingState.PENDING:
                buffer = []
                state = RecordingState.RUNNING
            buffer.extend(indata[:, 0])
        elif not is_speaking and state == RecordingState.RUNNING:
            silence_start_time = tm.time()  # Reset silence timer
            buffer.extend(indata[:, 0])
            state = RecordingState.SILENCE
        elif not is_speaking and state == RecordingState.SILENCE:
            if tm.time() - silence_start_time > silence_duration:
                audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
                node.send_output("audio", pa.array(audio_data))
                state = RecordingState.PENDING
            else:
                buffer.extend(indata[:, 0])

    # Start recording
    with sd.InputStream(
        callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
    ):
        while True:
            sd.sleep(int(100 * 1000))
--- a/node-hub/dora-microphone/pyproject.toml
+++ b/node-hub/dora-microphone/pyproject.toml
@@ -0,0 +1,29 @@
 [tool.poetry]
 name = "dora-microphone"
 version = "0.3.5"
 authors = [
    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
    "Enzo Le Van <dev@enzo-le-van.fr>",
 ]
 description = "Dora dora-microphone"
 license = "MIT License"
 homepage = "https://github.com/dora-rs/dora.git"
 documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone/README.md"
 readme = "README.md"
 packages = [{ include = "dora_microphone" }]

 [tool.poetry.dependencies]
 dora-rs = "0.3.5"
 numpy = "< 2.0.0"
 pyarrow = ">= 5.0.0"
 sounddevice = "^0.4.6"

 [tool.poetry.scripts]
 dora-microphone = "dora_microphone.main:main"

 [build-system]
 requires = ["poetry-core>=1.8.0"]
 build-backend = "poetry.core.masonry.api"

 [project]
 readme = "README.md"
--- a/node-hub/dora-microphone/tests/test_arrow_sender.py
+++ b/node-hub/dora-microphone/tests/test_arrow_sender.py
@@ -0,0 +1,2 @@
 def test_placeholder():
    pass
--- a/node-hub/keyboard-listener/README.md
+++ b/node-hub/keyboard-listener/README.md
@@ -0,0 +1,3 @@
 # Dora Node for sending arrow data.

 This node send DATA that is specified within the environemnt variable or from `--data` argument.
--- a/node-hub/keyboard-listener/keyboard_listener/init.py
+++ b/node-hub/keyboard-listener/keyboard_listener/init.py
@@ -0,0 +1,11 @@
 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, "r", encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/keyboard-listener/keyboard_listener/main.py
+++ b/node-hub/keyboard-listener/keyboard_listener/main.py
@@ -0,0 +1,19 @@
 from pynput import keyboard
 from pynput.keyboard import Key, Events
 import pyarrow as pa
 from dora import Node


 node = Node()


 with keyboard.Events() as events:
    while True:
        event = events.get(1.0)
        if event is not None and isinstance(event, Events.Press):
            if hasattr(event.key, "char"):
                if event.key.char is not None:
                    node.send_output("char", pa.array([event.key.char]))
        # busy_wait(0.1)
        # if event is not None and isinstance(event, Events.Release):
        # node.send_output("move", pa.array([0.0, 0, 0, 0, 0, 0]))
--- a/node-hub/keyboard-listener/pyproject.toml
+++ b/node-hub/keyboard-listener/pyproject.toml
@@ -0,0 +1,29 @@
 [tool.poetry]
 name = "keyboard-listener"
 version = "0.3.5"
 authors = [
    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
    "Enzo Le Van <dev@enzo-le-van.fr>",
 ]
 description = "Dora keyboard-listener"
 license = "MIT License"
 homepage = "https://github.com/dora-rs/dora.git"
 documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/keyboard-listener/README.md"
 readme = "README.md"
 packages = [{ include = "keyboard_listener" }]

 [tool.poetry.dependencies]
 dora-rs = "0.3.5"
 numpy = "< 2.0.0"
 pyarrow = ">= 5.0.0"
 pynput = "^1.7.6"

 [tool.poetry.scripts]
 keyboard-listener = "keyboard_listener.main:main"

 [build-system]
 requires = ["poetry-core>=1.8.0"]
 build-backend = "poetry.core.masonry.api"

 [project]
 readme = "README.md"