You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

whisper_node.py 1.6 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import pyarrow as pa
  2. import whisper
  3. from pynput import keyboard
  4. from pynput.keyboard import Key, Events
  5. from dora import Node
  6. import torch
  7. import numpy as np
  8. import pyarrow as pa
  9. import sounddevice as sd
  10. import gc # garbage collect library
  11. model = whisper.load_model("base")
  12. SAMPLE_RATE = 16000
  13. node = Node()
  14. def get_text(duration) -> str:
  15. ## Microphone
  16. audio_data = sd.rec(
  17. int(SAMPLE_RATE * duration),
  18. samplerate=SAMPLE_RATE,
  19. channels=1,
  20. dtype=np.int16,
  21. blocking=True,
  22. )
  23. audio = audio_data.ravel().astype(np.float32) / 32768.0
  24. ## Speech to text
  25. audio = whisper.pad_or_trim(audio)
  26. return model.transcribe(audio, language="en")
  27. ## Check for keyboard event
  28. with keyboard.Events() as events:
  29. for dora_event in node:
  30. if dora_event["type"] == "INPUT":
  31. event = events.get(0.1)
  32. if (
  33. event is not None
  34. and (event.key == Key.alt_r or event.key == Key.ctrl_r)
  35. and isinstance(event, Events.Press)
  36. ):
  37. if event.key == Key.alt_r:
  38. result = get_text(5)
  39. node.send_output(
  40. "text_llm", pa.array([result["text"]]), dora_event["metadata"]
  41. )
  42. elif event.key == Key.ctrl_r:
  43. result = get_text(3)
  44. node.send_output(
  45. "text_policy",
  46. pa.array([result["text"]]),
  47. dora_event["metadata"],
  48. )