From 7aa7a5eeca5cf47917239757685ab7f19b14478f Mon Sep 17 00:00:00 2001
From: ShashwatPatil <shashwatpatil974@gmail.com>
Date: Tue, 11 Mar 2025 23:28:22 +0530
Subject: [PATCH] added the dora-llama-cpp-python node

---
 node-hub/dora-llama-cpp-python/README.md      | 138 ++++++++++++++++++
 .../dora_llama_cpp_python/__init__.py         |  11 ++
 .../dora_llama_cpp_python/__main__.py         |   5 +
 .../dora_llama_cpp_python/main.py             |  93 ++++++++++++
 node-hub/dora-llama-cpp-python/pyproject.toml |  26 ++++
 node-hub/dora-llama-cpp-python/test.yml       |  61 ++++++++
 .../tests/test_dora_llama_cpp_python.py       |   9 ++
 7 files changed, 343 insertions(+)
 create mode 100644 node-hub/dora-llama-cpp-python/README.md
 create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py
 create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py
 create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
 create mode 100644 node-hub/dora-llama-cpp-python/pyproject.toml
 create mode 100644 node-hub/dora-llama-cpp-python/test.yml
 create mode 100644 node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py

diff --git a/node-hub/dora-llama-cpp-python/README.md b/node-hub/dora-llama-cpp-python/README.md
new file mode 100644
index 00000000..0798c738
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/README.md
@@ -0,0 +1,138 @@
+# dora-llama-cpp-python
+
+A Dora node that provides access to LLaMA-based models using either llama.cpp or Hugging Face backends for text generation.
+
+## Features
+
+- Supports both llama.cpp (CPU) and Hugging Face (CPU/GPU) backends
+- Easy integration with speech-to-text and text-to-speech pipelines  
+- Configurable system prompts and activation words
+- Chat history support with Hugging Face models
+- Lightweight CPU inference with GGUF models
+
+## Getting started
+
+### Installation
+
+```bash
+uv venv -p 3.11 --seed
+uv pip install -e .
+```
+
+## Usage
+
+The node can be configured in your dataflow YAML file:
+
+```yaml
+- id: dora-llama-cpp-python
+  build: pip install -e path/to/dora-llama-cpp-python
+  path: dora-llama-cpp-python
+  inputs:
+    text: source_node/text  # Input text to generate response for
+  outputs:
+    - text  # Generated response text
+  env:
+    MODEL_BACKEND: "llama-cpp"  # or "huggingface"
+    SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
+    ACTIVATION_WORDS: "what how who where you" # Space-separated activation words
+```
+
+### Configuration Options
+
+- `MODEL_BACKEND`: Choose between:
+  - `llama-cpp`: Uses GGUF models via llama.cpp (CPU-optimized, default)
+  - `huggingface`: Uses Hugging Face Transformers models
+
+- `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior
+- `ACTIVATION_WORDS`: Space-separated list of words that trigger model response
+
+## Examples
+
+### Basic Speech-to-Text-to-Speech Pipeline
+
+This example shows how to create a conversational AI pipeline that:
+1. Captures audio from microphone
+2. Converts speech to text
+3. Generates AI responses
+4. Converts responses back to speech
+
+```yaml
+nodes:
+  - id: dora-microphone
+    build: pip install dora-microphone
+    path: dora-microphone
+    inputs:
+      tick: dora/timer/millis/2000
+    outputs:
+      - audio
+
+  - id: dora-vad
+    build: pip install dora-vad
+    path: dora-vad
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - audio
+      - timestamp_start
+
+  - id: dora-whisper
+    build: pip install dora-distil-whisper
+    path: dora-distil-whisper
+    inputs:
+      input: dora-vad/audio
+    outputs:
+      - text
+
+  - id: dora-llama-cpp-python
+    build: pip install -e .
+    path: dora-llama-cpp-python
+    inputs:
+      text: dora-whisper/text
+    outputs:
+      - text
+    env:
+      MODEL_BACKEND: llama-cpp
+      SYSTEM_PROMPT: "You're a helpful assistant."
+      ACTIVATION_WORDS: "hey help what how"
+
+  - id: dora-tts
+    build: pip install dora-kokoro-tts
+    path: dora-kokoro-tts
+    inputs:
+      text: dora-llama-cpp-python/text
+    outputs:
+      - audio
+```
+
+### Running the Example
+
+```bash
+dora build example.yml
+dora run example.yml
+```
+
+## Contribution Guide
+
+- Format with [ruff](https://docs.astral.sh/ruff/):
+
+```bash
+uv pip install ruff
+uv run ruff check . --fix
+```
+
+- Lint with ruff:
+
+```bash
+uv run ruff check .
+```
+
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+
+```bash
+uv pip install pytest
+uv run pytest . # Test
+```
+
+## License
+
+dora-llama-cpp-python's code is released under the MIT License
diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py
new file mode 100644
index 00000000..bcbfde6d
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py
@@ -0,0 +1,5 @@
+from .main import main
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
new file mode 100644
index 00000000..e7a12a10
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
@@ -0,0 +1,93 @@
+import os
+import pyarrow as pa
+from dora import Node
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+# System prompt
+SYSTEM_PROMPT = os.getenv(
+    "SYSTEM_PROMPT",
+    "You're a very succinct AI assistant with short answers.",
+)
+
+# Model selection based on ENV variable
+MODEL_BACKEND = os.getenv("MODEL_BACKEND", "llama-cpp")  # Default to CPU-based Llama
+
+
+def get_model_llama_cpp():
+    """Load a GGUF model using llama-cpp-python (CPU by default)."""
+    from llama_cpp import Llama
+
+    llm = Llama.from_pretrained(
+        repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename="*fp16.gguf", verbose=False
+    )
+    return llm
+
+
+def get_model_huggingface():
+    """Load a Hugging Face transformers model."""
+    model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name, torch_dtype="auto", device_map="cpu"
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+
+
+ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "what how who where you").split()
+
+
+def generate_hf(model, tokenizer, prompt: str, history) -> str:
+    """Generates text using a Hugging Face model."""
+    history += [{"role": "user", "content": prompt}]
+    text = tokenizer.apply_chat_template(
+        history, tokenize=False, add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to("cpu")  
+    generated_ids = model.generate(**model_inputs, max_new_tokens=512)
+    generated_ids = [
+        output_ids[len(input_ids) :]
+        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    history += [{"role": "assistant", "content": response}]
+    return response, history
+
+
+def main():
+    history = []
+
+    # Select model backend
+    if MODEL_BACKEND == "llama-cpp":
+        model = get_model_llama_cpp()
+    else:
+        model, tokenizer = get_model_huggingface()
+
+    node = Node()
+
+    for event in node:
+        if event["type"] == "INPUT":
+            text = event["value"][0].as_py()
+            words = text.lower().split()
+            print(words)
+
+            if any(word in ACTIVATION_WORDS for word in words):
+                print("")
+                if MODEL_BACKEND == "llama-cpp":
+                    response = model(
+                        f"Q: {text} A: ",
+                        max_tokens=24,
+                        stop=["Q:", "\n"],
+                    )["choices"][0]["text"]
+                else:
+                    response, history = generate_hf(model, tokenizer, text, history)
+                
+                # log output 
+                print(response)
+                node.send_output(
+                    output_id="text", data=pa.array([response]), metadata={}
+                )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-llama-cpp-python/pyproject.toml b/node-hub/dora-llama-cpp-python/pyproject.toml
new file mode 100644
index 00000000..a5b9c6bb
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "dora-llama-cpp-python"
+version = "0.0.0"
+authors = [{ name = "Shashwat Patil", email = "email@email.com" }]
+description = "dora-llama-cpp-python"
+license = { text = "MIT" }
+readme = "README.md"
+requires-python = ">=3.9"
+
+dependencies = [
+    "dora-rs >= 0.3.9",
+    "torch == 2.4.0",
+    "torchvision >= 0.19",
+    "torchaudio >= 2.1.0",
+    "opencv-python >= 4.1.1",
+    "modelscope >= 1.18.1",
+    "accelerate >= 1.3.0",
+    "transformers",
+    "llama-cpp-python",
+]
+
+[dependency-groups]
+dev = ["pytest >=8.1.1", "ruff >=0.9.1"]
+
+[project.scripts]
+dora-llama-cpp-python = "dora_llama_cpp_python.main:main"
diff --git a/node-hub/dora-llama-cpp-python/test.yml b/node-hub/dora-llama-cpp-python/test.yml
new file mode 100644
index 00000000..6aa18a97
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/test.yml
@@ -0,0 +1,61 @@
+nodes:
+  - id: dora-microphone
+    build: pip install -e ../../node-hub/dora-microphone
+    path: dora-microphone
+    inputs:
+      tick: dora/timer/millis/2000
+    outputs:
+      - audio
+
+  - id: dora-vad
+    build: pip install -e ../../node-hub/dora-vad
+    path: dora-vad
+    inputs:
+      audio: dora-microphone/audio
+    outputs:
+      - audio
+      - timestamp_start
+
+  - id: dora-distil-whisper
+    build: pip install -e ../../node-hub/dora-distil-whisper
+    path: dora-distil-whisper
+    inputs:
+      input: dora-vad/audio
+    outputs:
+      - text
+    env:
+      TARGET_LANGUAGE: english
+
+  - id: dora-llama-cpp-python
+    build: pip install -e ../../node-hub/dora-llama-cpp-python
+    path: dora-llama-cpp-python
+    inputs:
+      text: dora-distil-whisper/text
+    outputs:
+      - text
+    env:
+      MODEL_BACKEND: llama-cpp  # Can be changed to "huggingface" if needed
+
+  - id: plot
+    build: pip install -e ../../node-hub/dora-rerun
+    path: dora-rerun
+    inputs:
+      text_llama: dora-llama-cpp-python/text  
+      text_whisper: dora-distil-whisper/text
+
+  - id: dora-kokoro-tts
+    build: pip install -e ../../node-hub/dora-kokoro-tts
+    path: dora-kokoro-tts
+    inputs:
+      text: dora-llama-cpp-python/text  
+    outputs:
+      - audio
+    env:
+      ACTIVATION_WORDS: you
+
+  - id: dora-pyaudio
+    build: pip install -e ../../node-hub/dora-pyaudio
+    path: dora-pyaudio
+    inputs:
+      audio: dora-kokoro-tts/audio
+      timestamp_start: dora-vad/timestamp_start
diff --git a/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py b/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py
new file mode 100644
index 00000000..a4b11758
--- /dev/null
+++ b/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py
@@ -0,0 +1,9 @@
+import pytest
+
+
+def test_import_main():
+    from dora_llama_cpp_python.main import main
+
+    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
+    with pytest.raises(RuntimeError):
+        main()