Browse Source

adding keyboard listener, microphone, and whisper node

tags/v0.3.6
haixuanTao 1 year ago
parent
commit
7debc07b4e
17 changed files with 347 additions and 0 deletions
  1. +3
    -0
      node-hub/dora-distil-whisper/README.md
  2. +11
    -0
      node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py
  3. +42
    -0
      node-hub/dora-distil-whisper/dora_distil_whisper/main.py
  4. +20
    -0
      node-hub/dora-distil-whisper/graphs/dataflow.yml
  5. +29
    -0
      node-hub/dora-distil-whisper/pyproject.toml
  6. +2
    -0
      node-hub/dora-distil-whisper/tests/test_arrow_sender.py
  7. +3
    -0
      node-hub/dora-microphone/README.md
  8. +11
    -0
      node-hub/dora-microphone/dora_microphone/__init__.py
  9. +61
    -0
      node-hub/dora-microphone/dora_microphone/main.py
  10. +11
    -0
      node-hub/dora-microphone/dora_microphone_save/__init__.py
  11. +61
    -0
      node-hub/dora-microphone/dora_microphone_save/main.py
  12. +29
    -0
      node-hub/dora-microphone/pyproject.toml
  13. +2
    -0
      node-hub/dora-microphone/tests/test_arrow_sender.py
  14. +3
    -0
      node-hub/keyboard-listener/README.md
  15. +11
    -0
      node-hub/keyboard-listener/keyboard_listener/__init__.py
  16. +19
    -0
      node-hub/keyboard-listener/keyboard_listener/main.py
  17. +29
    -0
      node-hub/keyboard-listener/pyproject.toml

+ 3
- 0
node-hub/dora-distil-whisper/README.md View File

@@ -0,0 +1,3 @@
# Dora Node for sending arrow data.

This node send DATA that is specified within the environemnt variable or from `--data` argument.

+ 11
- 0
node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 42
- 0
node-hub/dora-distil-whisper/dora_distil_whisper/main.py View File

@@ -0,0 +1,42 @@
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from dora import Node
import pyarrow as pa
import os

os.environ["TRANSFORMERS_OFFLINE"] = "1"

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "distil-whisper/distil-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id,
torch_dtype=torch_dtype,
low_cpu_mem_usage=True,
use_safetensors=True,
local_files_only=True,
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
torch_dtype=torch_dtype,
device=device,
generate_kwargs={"language": "chinese"},
)


def main():
node = Node()
for event in node:
if event["type"] == "INPUT":
audio = event["value"].to_numpy()
result = pipe(audio)
node.send_output("text", pa.array([result["text"]]))

+ 20
- 0
node-hub/dora-distil-whisper/graphs/dataflow.yml View File

@@ -0,0 +1,20 @@
nodes:
- id: dora-microphone
build: pip install -e ../../dora-microphone
path: dora-microphone
outputs:
- audio

- id: dora-distil-whisper
build: pip install -e ../.
path: dora-distil-whisper
inputs:
audio: dora-microphone/audio
outputs:
- text

- id: terminal-print
build: cargo build -p terminal-print
path: dynamic
inputs:
text: dora-distil-whisper/text

+ 29
- 0
node-hub/dora-distil-whisper/pyproject.toml View File

@@ -0,0 +1,29 @@
[tool.poetry]
name = "dora-distil-whisper"
version = "0.3.5"
authors = [
"Haixuan Xavier Tao <tao.xavier@outlook.com>",
"Enzo Le Van <dev@enzo-le-van.fr>",
]
description = "Dora dora-distil-whisper"
license = "MIT License"
homepage = "https://github.com/dora-rs/dora.git"
documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md"
readme = "README.md"
packages = [{ include = "dora_distil_whisper" }]

[tool.poetry.dependencies]
dora-rs = "0.3.5"
numpy = "< 2.0.0"
pyarrow = ">= 5.0.0"
transformers = ">= 4.0.0"

[tool.poetry.scripts]
dora-distil-whisper = "dora_distil_whisper.main:main"

[build-system]
requires = ["poetry-core>=1.8.0"]
build-backend = "poetry.core.masonry.api"

[project]
readme = "README.md"

+ 2
- 0
node-hub/dora-distil-whisper/tests/test_arrow_sender.py View File

@@ -0,0 +1,2 @@
def test_placeholder():
pass

+ 3
- 0
node-hub/dora-microphone/README.md View File

@@ -0,0 +1,3 @@
# Dora Node for sending arrow data.

This node send DATA that is specified within the environemnt variable or from `--data` argument.

+ 11
- 0
node-hub/dora-microphone/dora_microphone/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 61
- 0
node-hub/dora-microphone/dora_microphone/main.py View File

@@ -0,0 +1,61 @@
import sounddevice as sd
import numpy as np
import pyarrow as pa
import time as tm
from enum import Enum

from dora import Node


class RecordingState(Enum):
"""Enum for recording states."""

PENDING = 0
RUNNING = 1
SILENCE = 2


def detect_speech(audio_data, threshold):
"""Check if the amplitude of the audio signal exceeds the threshold."""
return np.any(np.abs(audio_data) > threshold)


def main():
# Parameters
threshold = 500 # Threshold for detecting speech (adjust this as needed)
SAMPLE_RATE = 16000
silence_duration = 0.5 # Duration of silence before stopping the recording

# Initialize buffer and recording flag
buffer = []
state = RecordingState.PENDING
silence_start_time = tm.time()
node = Node()

def callback(indata, frames, time, status):
nonlocal buffer, state, silence_start_time, node

is_speaking = detect_speech(indata[:, 0], threshold)
if is_speaking:
if state == RecordingState.PENDING:
buffer = []
state = RecordingState.RUNNING
buffer.extend(indata[:, 0])
elif not is_speaking and state == RecordingState.RUNNING:
silence_start_time = tm.time() # Reset silence timer
buffer.extend(indata[:, 0])
state = RecordingState.SILENCE
elif not is_speaking and state == RecordingState.SILENCE:
if tm.time() - silence_start_time > silence_duration:
audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
node.send_output("audio", pa.array(audio_data))
state = RecordingState.PENDING
else:
buffer.extend(indata[:, 0])

# Start recording
with sd.InputStream(
callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
):
while True:
sd.sleep(int(100 * 1000))

+ 11
- 0
node-hub/dora-microphone/dora_microphone_save/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 61
- 0
node-hub/dora-microphone/dora_microphone_save/main.py View File

@@ -0,0 +1,61 @@
import sounddevice as sd
import numpy as np
import pyarrow as pa
import time as tm
from enum import Enum

from dora import Node


class RecordingState(Enum):
"""Enum for recording states."""

PENDING = 0
RUNNING = 1
SILENCE = 2


def detect_speech(audio_data, threshold):
"""Check if the amplitude of the audio signal exceeds the threshold."""
return np.any(np.abs(audio_data) > threshold)


def main():
# Parameters
threshold = 500 # Threshold for detecting speech (adjust this as needed)
SAMPLE_RATE = 16000
silence_duration = 4 # Duration of silence before stopping the recording

# Initialize buffer and recording flag
buffer = []
state = RecordingState.PENDING
silence_start_time = tm.time()
node = Node()

def callback(indata, frames, time, status):
nonlocal buffer, state, silence_start_time, node

is_speaking = detect_speech(indata[:, 0], threshold)
if is_speaking:
if state == RecordingState.PENDING:
buffer = []
state = RecordingState.RUNNING
buffer.extend(indata[:, 0])
elif not is_speaking and state == RecordingState.RUNNING:
silence_start_time = tm.time() # Reset silence timer
buffer.extend(indata[:, 0])
state = RecordingState.SILENCE
elif not is_speaking and state == RecordingState.SILENCE:
if tm.time() - silence_start_time > silence_duration:
audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
node.send_output("audio", pa.array(audio_data))
state = RecordingState.PENDING
else:
buffer.extend(indata[:, 0])

# Start recording
with sd.InputStream(
callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
):
while True:
sd.sleep(int(100 * 1000))

+ 29
- 0
node-hub/dora-microphone/pyproject.toml View File

@@ -0,0 +1,29 @@
[tool.poetry]
name = "dora-microphone"
version = "0.3.5"
authors = [
"Haixuan Xavier Tao <tao.xavier@outlook.com>",
"Enzo Le Van <dev@enzo-le-van.fr>",
]
description = "Dora dora-microphone"
license = "MIT License"
homepage = "https://github.com/dora-rs/dora.git"
documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone/README.md"
readme = "README.md"
packages = [{ include = "dora_microphone" }]

[tool.poetry.dependencies]
dora-rs = "0.3.5"
numpy = "< 2.0.0"
pyarrow = ">= 5.0.0"
sounddevice = "^0.4.6"

[tool.poetry.scripts]
dora-microphone = "dora_microphone.main:main"

[build-system]
requires = ["poetry-core>=1.8.0"]
build-backend = "poetry.core.masonry.api"

[project]
readme = "README.md"

+ 2
- 0
node-hub/dora-microphone/tests/test_arrow_sender.py View File

@@ -0,0 +1,2 @@
def test_placeholder():
pass

+ 3
- 0
node-hub/keyboard-listener/README.md View File

@@ -0,0 +1,3 @@
# Dora Node for sending arrow data.

This node send DATA that is specified within the environemnt variable or from `--data` argument.

+ 11
- 0
node-hub/keyboard-listener/keyboard_listener/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 19
- 0
node-hub/keyboard-listener/keyboard_listener/main.py View File

@@ -0,0 +1,19 @@
from pynput import keyboard
from pynput.keyboard import Key, Events
import pyarrow as pa
from dora import Node


node = Node()


with keyboard.Events() as events:
while True:
event = events.get(1.0)
if event is not None and isinstance(event, Events.Press):
if hasattr(event.key, "char"):
if event.key.char is not None:
node.send_output("char", pa.array([event.key.char]))
# busy_wait(0.1)
# if event is not None and isinstance(event, Events.Release):
# node.send_output("move", pa.array([0.0, 0, 0, 0, 0, 0]))

+ 29
- 0
node-hub/keyboard-listener/pyproject.toml View File

@@ -0,0 +1,29 @@
[tool.poetry]
name = "keyboard-listener"
version = "0.3.5"
authors = [
"Haixuan Xavier Tao <tao.xavier@outlook.com>",
"Enzo Le Van <dev@enzo-le-van.fr>",
]
description = "Dora keyboard-listener"
license = "MIT License"
homepage = "https://github.com/dora-rs/dora.git"
documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/keyboard-listener/README.md"
readme = "README.md"
packages = [{ include = "keyboard_listener" }]

[tool.poetry.dependencies]
dora-rs = "0.3.5"
numpy = "< 2.0.0"
pyarrow = ">= 5.0.0"
pynput = "^1.7.6"

[tool.poetry.scripts]
keyboard-listener = "keyboard_listener.main:main"

[build-system]
requires = ["poetry-core>=1.8.0"]
build-backend = "poetry.core.masonry.api"

[project]
readme = "README.md"

Loading…
Cancel
Save