dora-rs
/
dora

 
			
							"""Module for speech recognition using Whisper.

This module provides functionality for capturing audio input and converting
it to text using the Whisper speech recognition model.
"""

import numpy as np
import pyarrow as pa
import sounddevice as sd
import whisper
from dora import Node
from pynput import keyboard
from pynput.keyboard import Events, Key

model = whisper.load_model("base")

SAMPLE_RATE = 16000

node = Node()


def get_text(duration) -> str:
    """Capture audio and convert it to text using Whisper.

    Args:
        duration: Duration of audio to capture in seconds

    Returns:
        str: Transcribed text from the audio input

    """
    ## Microphone
    audio_data = sd.rec(
        int(SAMPLE_RATE * duration),
        samplerate=SAMPLE_RATE,
        channels=1,
        dtype=np.int16,
        blocking=True,
    )

    audio = audio_data.ravel().astype(np.float32) / 32768.0

    ## Speech to text
    audio = whisper.pad_or_trim(audio)
    return model.transcribe(audio, language="en")


## Check for keyboard event
with keyboard.Events() as events:
    for dora_event in node:
        if dora_event["type"] == "INPUT":
            event = events.get(0.1)
            if (
                event is not None
                and (event.key == Key.alt_r or event.key == Key.ctrl_r)
                and isinstance(event, Events.Press)
            ):
                if event.key == Key.alt_r:
                    result = get_text(5)
                    node.send_output(
                        "text_llm", pa.array([result["text"]]), dora_event["metadata"],
                    )
                elif event.key == Key.ctrl_r:
                    result = get_text(3)
                    node.send_output(
                        "text_policy",
                        pa.array([result["text"]]),
                        dora_event["metadata"],
                    )