From 7debc07b4e2bd74675b97bc8588eb7f905c96a18 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Fri, 16 Aug 2024 13:09:08 +0200
Subject: [PATCH] adding keyboard listener, microphone, and whisper node

---
 node-hub/dora-distil-whisper/README.md        |  3 +
 .../dora_distil_whisper/__init__.py           | 11 ++++
 .../dora_distil_whisper/main.py               | 42 +++++++++++++
 .../dora-distil-whisper/graphs/dataflow.yml   | 20 ++++++
 node-hub/dora-distil-whisper/pyproject.toml   | 29 +++++++++
 .../tests/test_arrow_sender.py                |  2 +
 node-hub/dora-microphone/README.md            |  3 +
 .../dora_microphone/__init__.py               | 11 ++++
 .../dora-microphone/dora_microphone/main.py   | 61 +++++++++++++++++++
 .../dora_microphone_save/__init__.py          | 11 ++++
 .../dora_microphone_save/main.py              | 61 +++++++++++++++++++
 node-hub/dora-microphone/pyproject.toml       | 29 +++++++++
 .../tests/test_arrow_sender.py                |  2 +
 node-hub/keyboard-listener/README.md          |  3 +
 .../keyboard_listener/__init__.py             | 11 ++++
 .../keyboard_listener/main.py                 | 19 ++++++
 node-hub/keyboard-listener/pyproject.toml     | 29 +++++++++
 17 files changed, 347 insertions(+)
 create mode 100644 node-hub/dora-distil-whisper/README.md
 create mode 100644 node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py
 create mode 100644 node-hub/dora-distil-whisper/dora_distil_whisper/main.py
 create mode 100644 node-hub/dora-distil-whisper/graphs/dataflow.yml
 create mode 100644 node-hub/dora-distil-whisper/pyproject.toml
 create mode 100644 node-hub/dora-distil-whisper/tests/test_arrow_sender.py
 create mode 100644 node-hub/dora-microphone/README.md
 create mode 100644 node-hub/dora-microphone/dora_microphone/__init__.py
 create mode 100644 node-hub/dora-microphone/dora_microphone/main.py
 create mode 100644 node-hub/dora-microphone/dora_microphone_save/__init__.py
 create mode 100644 node-hub/dora-microphone/dora_microphone_save/main.py
 create mode 100644 node-hub/dora-microphone/pyproject.toml
 create mode 100644 node-hub/dora-microphone/tests/test_arrow_sender.py
 create mode 100644 node-hub/keyboard-listener/README.md
 create mode 100644 node-hub/keyboard-listener/keyboard_listener/__init__.py
 create mode 100644 node-hub/keyboard-listener/keyboard_listener/main.py
 create mode 100644 node-hub/keyboard-listener/pyproject.toml

diff --git a/node-hub/dora-distil-whisper/README.md b/node-hub/dora-distil-whisper/README.md
new file mode 100644
index 00000000..125e029f
--- /dev/null
+++ b/node-hub/dora-distil-whisper/README.md
@@ -0,0 +1,3 @@
+# Dora Node for sending arrow data.
+
+This node send DATA that is specified within the environemnt variable or from `--data` argument.
diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py b/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
new file mode 100644
index 00000000..d938ddc8
--- /dev/null
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
@@ -0,0 +1,42 @@
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from dora import Node
+import pyarrow as pa
+import os
+
+os.environ["TRANSFORMERS_OFFLINE"] = "1"
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+model_id = "distil-whisper/distil-large-v3"
+
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id,
+    torch_dtype=torch_dtype,
+    low_cpu_mem_usage=True,
+    use_safetensors=True,
+    local_files_only=True,
+)
+model.to(device)
+
+processor = AutoProcessor.from_pretrained(model_id)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    torch_dtype=torch_dtype,
+    device=device,
+    generate_kwargs={"language": "chinese"},
+)
+
+
+def main():
+    node = Node()
+    for event in node:
+        if event["type"] == "INPUT":
+            audio = event["value"].to_numpy()
+            result = pipe(audio)
+            node.send_output("text", pa.array([result["text"]]))
diff --git a/node-hub/dora-distil-whisper/graphs/dataflow.yml b/node-hub/dora-distil-whisper/graphs/dataflow.yml
new file mode 100644
index 00000000..5c93226f
--- /dev/null
+++ b/node-hub/dora-distil-whisper/graphs/dataflow.yml
@@ -0,0 +1,20 @@
+nodes:
+  - id: dora-microphone
+    build: pip install -e ../../dora-microphone
+    path: dora-microphone
+    outputs:
+      - audio
+
+  - id: dora-distil-whisper
+    build: pip install -e ../.
+    path: dora-distil-whisper
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - text
+
+  - id: terminal-print
+    build: cargo build -p terminal-print
+    path: dynamic
+    inputs:
+      text: dora-distil-whisper/text
diff --git a/node-hub/dora-distil-whisper/pyproject.toml b/node-hub/dora-distil-whisper/pyproject.toml
new file mode 100644
index 00000000..e232f3aa
--- /dev/null
+++ b/node-hub/dora-distil-whisper/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "dora-distil-whisper"
+version = "0.3.5"
+authors = [
+    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
+    "Enzo Le Van <dev@enzo-le-van.fr>",
+]
+description = "Dora dora-distil-whisper"
+license = "MIT License"
+homepage = "https://github.com/dora-rs/dora.git"
+documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md"
+readme = "README.md"
+packages = [{ include = "dora_distil_whisper" }]
+
+[tool.poetry.dependencies]
+dora-rs = "0.3.5"
+numpy = "< 2.0.0"
+pyarrow = ">= 5.0.0"
+transformers = ">= 4.0.0"
+
+[tool.poetry.scripts]
+dora-distil-whisper = "dora_distil_whisper.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"
+
+[project]
+readme = "README.md"
diff --git a/node-hub/dora-distil-whisper/tests/test_arrow_sender.py b/node-hub/dora-distil-whisper/tests/test_arrow_sender.py
new file mode 100644
index 00000000..201975fc
--- /dev/null
+++ b/node-hub/dora-distil-whisper/tests/test_arrow_sender.py
@@ -0,0 +1,2 @@
+def test_placeholder():
+    pass
diff --git a/node-hub/dora-microphone/README.md b/node-hub/dora-microphone/README.md
new file mode 100644
index 00000000..125e029f
--- /dev/null
+++ b/node-hub/dora-microphone/README.md
@@ -0,0 +1,3 @@
+# Dora Node for sending arrow data.
+
+This node send DATA that is specified within the environemnt variable or from `--data` argument.
diff --git a/node-hub/dora-microphone/dora_microphone/__init__.py b/node-hub/dora-microphone/dora_microphone/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-microphone/dora_microphone/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py
new file mode 100644
index 00000000..dc05554d
--- /dev/null
+++ b/node-hub/dora-microphone/dora_microphone/main.py
@@ -0,0 +1,61 @@
+import sounddevice as sd
+import numpy as np
+import pyarrow as pa
+import time as tm
+from enum import Enum
+
+from dora import Node
+
+
+class RecordingState(Enum):
+    """Enum for recording states."""
+
+    PENDING = 0
+    RUNNING = 1
+    SILENCE = 2
+
+
+def detect_speech(audio_data, threshold):
+    """Check if the amplitude of the audio signal exceeds the threshold."""
+    return np.any(np.abs(audio_data) > threshold)
+
+
+def main():
+    # Parameters
+    threshold = 500  # Threshold for detecting speech (adjust this as needed)
+    SAMPLE_RATE = 16000
+    silence_duration = 0.5  # Duration of silence before stopping the recording
+
+    # Initialize buffer and recording flag
+    buffer = []
+    state = RecordingState.PENDING
+    silence_start_time = tm.time()
+    node = Node()
+
+    def callback(indata, frames, time, status):
+        nonlocal buffer, state, silence_start_time, node
+
+        is_speaking = detect_speech(indata[:, 0], threshold)
+        if is_speaking:
+            if state == RecordingState.PENDING:
+                buffer = []
+                state = RecordingState.RUNNING
+            buffer.extend(indata[:, 0])
+        elif not is_speaking and state == RecordingState.RUNNING:
+            silence_start_time = tm.time()  # Reset silence timer
+            buffer.extend(indata[:, 0])
+            state = RecordingState.SILENCE
+        elif not is_speaking and state == RecordingState.SILENCE:
+            if tm.time() - silence_start_time > silence_duration:
+                audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
+                node.send_output("audio", pa.array(audio_data))
+                state = RecordingState.PENDING
+            else:
+                buffer.extend(indata[:, 0])
+
+    # Start recording
+    with sd.InputStream(
+        callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
+    ):
+        while True:
+            sd.sleep(int(100 * 1000))
diff --git a/node-hub/dora-microphone/dora_microphone_save/__init__.py b/node-hub/dora-microphone/dora_microphone_save/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-microphone/dora_microphone_save/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-microphone/dora_microphone_save/main.py b/node-hub/dora-microphone/dora_microphone_save/main.py
new file mode 100644
index 00000000..ad3e9741
--- /dev/null
+++ b/node-hub/dora-microphone/dora_microphone_save/main.py
@@ -0,0 +1,61 @@
+import sounddevice as sd
+import numpy as np
+import pyarrow as pa
+import time as tm
+from enum import Enum
+
+from dora import Node
+
+
+class RecordingState(Enum):
+    """Enum for recording states."""
+
+    PENDING = 0
+    RUNNING = 1
+    SILENCE = 2
+
+
+def detect_speech(audio_data, threshold):
+    """Check if the amplitude of the audio signal exceeds the threshold."""
+    return np.any(np.abs(audio_data) > threshold)
+
+
+def main():
+    # Parameters
+    threshold = 500  # Threshold for detecting speech (adjust this as needed)
+    SAMPLE_RATE = 16000
+    silence_duration = 4  # Duration of silence before stopping the recording
+
+    # Initialize buffer and recording flag
+    buffer = []
+    state = RecordingState.PENDING
+    silence_start_time = tm.time()
+    node = Node()
+
+    def callback(indata, frames, time, status):
+        nonlocal buffer, state, silence_start_time, node
+
+        is_speaking = detect_speech(indata[:, 0], threshold)
+        if is_speaking:
+            if state == RecordingState.PENDING:
+                buffer = []
+                state = RecordingState.RUNNING
+            buffer.extend(indata[:, 0])
+        elif not is_speaking and state == RecordingState.RUNNING:
+            silence_start_time = tm.time()  # Reset silence timer
+            buffer.extend(indata[:, 0])
+            state = RecordingState.SILENCE
+        elif not is_speaking and state == RecordingState.SILENCE:
+            if tm.time() - silence_start_time > silence_duration:
+                audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0
+                node.send_output("audio", pa.array(audio_data))
+                state = RecordingState.PENDING
+            else:
+                buffer.extend(indata[:, 0])
+
+    # Start recording
+    with sd.InputStream(
+        callback=callback, dtype=np.int16, channels=1, samplerate=SAMPLE_RATE
+    ):
+        while True:
+            sd.sleep(int(100 * 1000))
diff --git a/node-hub/dora-microphone/pyproject.toml b/node-hub/dora-microphone/pyproject.toml
new file mode 100644
index 00000000..45aecb30
--- /dev/null
+++ b/node-hub/dora-microphone/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "dora-microphone"
+version = "0.3.5"
+authors = [
+    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
+    "Enzo Le Van <dev@enzo-le-van.fr>",
+]
+description = "Dora dora-microphone"
+license = "MIT License"
+homepage = "https://github.com/dora-rs/dora.git"
+documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-microphone/README.md"
+readme = "README.md"
+packages = [{ include = "dora_microphone" }]
+
+[tool.poetry.dependencies]
+dora-rs = "0.3.5"
+numpy = "< 2.0.0"
+pyarrow = ">= 5.0.0"
+sounddevice = "^0.4.6"
+
+[tool.poetry.scripts]
+dora-microphone = "dora_microphone.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"
+
+[project]
+readme = "README.md"
diff --git a/node-hub/dora-microphone/tests/test_arrow_sender.py b/node-hub/dora-microphone/tests/test_arrow_sender.py
new file mode 100644
index 00000000..201975fc
--- /dev/null
+++ b/node-hub/dora-microphone/tests/test_arrow_sender.py
@@ -0,0 +1,2 @@
+def test_placeholder():
+    pass
diff --git a/node-hub/keyboard-listener/README.md b/node-hub/keyboard-listener/README.md
new file mode 100644
index 00000000..125e029f
--- /dev/null
+++ b/node-hub/keyboard-listener/README.md
@@ -0,0 +1,3 @@
+# Dora Node for sending arrow data.
+
+This node send DATA that is specified within the environemnt variable or from `--data` argument.
diff --git a/node-hub/keyboard-listener/keyboard_listener/__init__.py b/node-hub/keyboard-listener/keyboard_listener/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/keyboard-listener/keyboard_listener/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/keyboard-listener/keyboard_listener/main.py b/node-hub/keyboard-listener/keyboard_listener/main.py
new file mode 100644
index 00000000..6ba2b49b
--- /dev/null
+++ b/node-hub/keyboard-listener/keyboard_listener/main.py
@@ -0,0 +1,19 @@
+from pynput import keyboard
+from pynput.keyboard import Key, Events
+import pyarrow as pa
+from dora import Node
+
+
+node = Node()
+
+
+with keyboard.Events() as events:
+    while True:
+        event = events.get(1.0)
+        if event is not None and isinstance(event, Events.Press):
+            if hasattr(event.key, "char"):
+                if event.key.char is not None:
+                    node.send_output("char", pa.array([event.key.char]))
+        # busy_wait(0.1)
+        # if event is not None and isinstance(event, Events.Release):
+        # node.send_output("move", pa.array([0.0, 0, 0, 0, 0, 0]))
diff --git a/node-hub/keyboard-listener/pyproject.toml b/node-hub/keyboard-listener/pyproject.toml
new file mode 100644
index 00000000..0f7a2a18
--- /dev/null
+++ b/node-hub/keyboard-listener/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "keyboard-listener"
+version = "0.3.5"
+authors = [
+    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
+    "Enzo Le Van <dev@enzo-le-van.fr>",
+]
+description = "Dora keyboard-listener"
+license = "MIT License"
+homepage = "https://github.com/dora-rs/dora.git"
+documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/keyboard-listener/README.md"
+readme = "README.md"
+packages = [{ include = "keyboard_listener" }]
+
+[tool.poetry.dependencies]
+dora-rs = "0.3.5"
+numpy = "< 2.0.0"
+pyarrow = ">= 5.0.0"
+pynput = "^1.7.6"
+
+[tool.poetry.scripts]
+keyboard-listener = "keyboard_listener.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"
+
+[project]
+readme = "README.md"