| @@ -0,0 +1,138 @@ | |||
| # dora-llama-cpp-python | |||
| A Dora node that provides access to LLaMA-based models using either llama.cpp or Hugging Face backends for text generation. | |||
| ## Features | |||
| - Supports both llama.cpp (CPU) and Hugging Face (CPU/GPU) backends | |||
| - Easy integration with speech-to-text and text-to-speech pipelines | |||
| - Configurable system prompts and activation words | |||
| - Chat history support with Hugging Face models | |||
| - Lightweight CPU inference with GGUF models | |||
| ## Getting started | |||
| ### Installation | |||
| ```bash | |||
| uv venv -p 3.11 --seed | |||
| uv pip install -e . | |||
| ``` | |||
| ## Usage | |||
| The node can be configured in your dataflow YAML file: | |||
| ```yaml | |||
| - id: dora-llama-cpp-python | |||
| build: pip install -e path/to/dora-llama-cpp-python | |||
| path: dora-llama-cpp-python | |||
| inputs: | |||
| text: source_node/text # Input text to generate response for | |||
| outputs: | |||
| - text # Generated response text | |||
| env: | |||
| MODEL_BACKEND: "llama-cpp" # or "huggingface" | |||
| SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." | |||
| ACTIVATION_WORDS: "what how who where you" # Space-separated activation words | |||
| ``` | |||
| ### Configuration Options | |||
| - `MODEL_BACKEND`: Choose between: | |||
| - `llama-cpp`: Uses GGUF models via llama.cpp (CPU-optimized, default) | |||
| - `huggingface`: Uses Hugging Face Transformers models | |||
| - `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior | |||
| - `ACTIVATION_WORDS`: Space-separated list of words that trigger model response | |||
| ## Examples | |||
| ### Basic Speech-to-Text-to-Speech Pipeline | |||
| This example shows how to create a conversational AI pipeline that: | |||
| 1. Captures audio from microphone | |||
| 2. Converts speech to text | |||
| 3. Generates AI responses | |||
| 4. Converts responses back to speech | |||
| ```yaml | |||
| nodes: | |||
| - id: dora-microphone | |||
| build: pip install dora-microphone | |||
| path: dora-microphone | |||
| inputs: | |||
| tick: dora/timer/millis/2000 | |||
| outputs: | |||
| - audio | |||
| - id: dora-vad | |||
| build: pip install dora-vad | |||
| path: dora-vad | |||
| inputs: | |||
| audio: dora-microphone/audio | |||
| outputs: | |||
| - audio | |||
| - timestamp_start | |||
| - id: dora-whisper | |||
| build: pip install dora-distil-whisper | |||
| path: dora-distil-whisper | |||
| inputs: | |||
| input: dora-vad/audio | |||
| outputs: | |||
| - text | |||
| - id: dora-llama-cpp-python | |||
| build: pip install -e . | |||
| path: dora-llama-cpp-python | |||
| inputs: | |||
| text: dora-whisper/text | |||
| outputs: | |||
| - text | |||
| env: | |||
| MODEL_BACKEND: llama-cpp | |||
| SYSTEM_PROMPT: "You're a helpful assistant." | |||
| ACTIVATION_WORDS: "hey help what how" | |||
| - id: dora-tts | |||
| build: pip install dora-kokoro-tts | |||
| path: dora-kokoro-tts | |||
| inputs: | |||
| text: dora-llama-cpp-python/text | |||
| outputs: | |||
| - audio | |||
| ``` | |||
| ### Running the Example | |||
| ```bash | |||
| dora build example.yml | |||
| dora run example.yml | |||
| ``` | |||
| ## Contribution Guide | |||
| - Format with [ruff](https://docs.astral.sh/ruff/): | |||
| ```bash | |||
| uv pip install ruff | |||
| uv run ruff check . --fix | |||
| ``` | |||
| - Lint with ruff: | |||
| ```bash | |||
| uv run ruff check . | |||
| ``` | |||
| - Test with [pytest](https://github.com/pytest-dev/pytest) | |||
| ```bash | |||
| uv pip install pytest | |||
| uv run pytest . # Test | |||
| ``` | |||
| ## License | |||
| dora-llama-cpp-python's code is released under the MIT License | |||
| @@ -0,0 +1,11 @@ | |||
| import os | |||
| # Define the path to the README file relative to the package directory | |||
| readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") | |||
| # Read the content of the README file | |||
| try: | |||
| with open(readme_path, "r", encoding="utf-8") as f: | |||
| __doc__ = f.read() | |||
| except FileNotFoundError: | |||
| __doc__ = "README file not found." | |||
| @@ -0,0 +1,5 @@ | |||
| from .main import main | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,93 @@ | |||
| import os | |||
| import pyarrow as pa | |||
| from dora import Node | |||
| from transformers import AutoModelForCausalLM, AutoTokenizer | |||
| # System prompt | |||
| SYSTEM_PROMPT = os.getenv( | |||
| "SYSTEM_PROMPT", | |||
| "You're a very succinct AI assistant with short answers.", | |||
| ) | |||
| # Model selection based on ENV variable | |||
| MODEL_BACKEND = os.getenv("MODEL_BACKEND", "llama-cpp") # Default to CPU-based Llama | |||
| def get_model_llama_cpp(): | |||
| """Load a GGUF model using llama-cpp-python (CPU by default).""" | |||
| from llama_cpp import Llama | |||
| llm = Llama.from_pretrained( | |||
| repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename="*fp16.gguf", verbose=False | |||
| ) | |||
| return llm | |||
| def get_model_huggingface(): | |||
| """Load a Hugging Face transformers model.""" | |||
| model_name = "Qwen/Qwen2.5-0.5B-Instruct" | |||
| model = AutoModelForCausalLM.from_pretrained( | |||
| model_name, torch_dtype="auto", device_map="cpu" | |||
| ) | |||
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |||
| return model, tokenizer | |||
| ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "what how who where you").split() | |||
| def generate_hf(model, tokenizer, prompt: str, history) -> str: | |||
| """Generates text using a Hugging Face model.""" | |||
| history += [{"role": "user", "content": prompt}] | |||
| text = tokenizer.apply_chat_template( | |||
| history, tokenize=False, add_generation_prompt=True | |||
| ) | |||
| model_inputs = tokenizer([text], return_tensors="pt").to("cpu") | |||
| generated_ids = model.generate(**model_inputs, max_new_tokens=512) | |||
| generated_ids = [ | |||
| output_ids[len(input_ids) :] | |||
| for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |||
| ] | |||
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |||
| history += [{"role": "assistant", "content": response}] | |||
| return response, history | |||
| def main(): | |||
| history = [] | |||
| # Select model backend | |||
| if MODEL_BACKEND == "llama-cpp": | |||
| model = get_model_llama_cpp() | |||
| else: | |||
| model, tokenizer = get_model_huggingface() | |||
| node = Node() | |||
| for event in node: | |||
| if event["type"] == "INPUT": | |||
| text = event["value"][0].as_py() | |||
| words = text.lower().split() | |||
| print(words) | |||
| if any(word in ACTIVATION_WORDS for word in words): | |||
| print("") | |||
| if MODEL_BACKEND == "llama-cpp": | |||
| response = model( | |||
| f"Q: {text} A: ", | |||
| max_tokens=24, | |||
| stop=["Q:", "\n"], | |||
| )["choices"][0]["text"] | |||
| else: | |||
| response, history = generate_hf(model, tokenizer, text, history) | |||
| # log output | |||
| print(response) | |||
| node.send_output( | |||
| output_id="text", data=pa.array([response]), metadata={} | |||
| ) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,26 @@ | |||
| [project] | |||
| name = "dora-llama-cpp-python" | |||
| version = "0.0.0" | |||
| authors = [{ name = "Shashwat Patil", email = "email@email.com" }] | |||
| description = "dora-llama-cpp-python" | |||
| license = { text = "MIT" } | |||
| readme = "README.md" | |||
| requires-python = ">=3.9" | |||
| dependencies = [ | |||
| "dora-rs >= 0.3.9", | |||
| "torch == 2.4.0", | |||
| "torchvision >= 0.19", | |||
| "torchaudio >= 2.1.0", | |||
| "opencv-python >= 4.1.1", | |||
| "modelscope >= 1.18.1", | |||
| "accelerate >= 1.3.0", | |||
| "transformers", | |||
| "llama-cpp-python", | |||
| ] | |||
| [dependency-groups] | |||
| dev = ["pytest >=8.1.1", "ruff >=0.9.1"] | |||
| [project.scripts] | |||
| dora-llama-cpp-python = "dora_llama_cpp_python.main:main" | |||
| @@ -0,0 +1,61 @@ | |||
| nodes: | |||
| - id: dora-microphone | |||
| build: pip install -e ../../node-hub/dora-microphone | |||
| path: dora-microphone | |||
| inputs: | |||
| tick: dora/timer/millis/2000 | |||
| outputs: | |||
| - audio | |||
| - id: dora-vad | |||
| build: pip install -e ../../node-hub/dora-vad | |||
| path: dora-vad | |||
| inputs: | |||
| audio: dora-microphone/audio | |||
| outputs: | |||
| - audio | |||
| - timestamp_start | |||
| - id: dora-distil-whisper | |||
| build: pip install -e ../../node-hub/dora-distil-whisper | |||
| path: dora-distil-whisper | |||
| inputs: | |||
| input: dora-vad/audio | |||
| outputs: | |||
| - text | |||
| env: | |||
| TARGET_LANGUAGE: english | |||
| - id: dora-llama-cpp-python | |||
| build: pip install -e ../../node-hub/dora-llama-cpp-python | |||
| path: dora-llama-cpp-python | |||
| inputs: | |||
| text: dora-distil-whisper/text | |||
| outputs: | |||
| - text | |||
| env: | |||
| MODEL_BACKEND: llama-cpp # Can be changed to "huggingface" if needed | |||
| - id: plot | |||
| build: pip install -e ../../node-hub/dora-rerun | |||
| path: dora-rerun | |||
| inputs: | |||
| text_llama: dora-llama-cpp-python/text | |||
| text_whisper: dora-distil-whisper/text | |||
| - id: dora-kokoro-tts | |||
| build: pip install -e ../../node-hub/dora-kokoro-tts | |||
| path: dora-kokoro-tts | |||
| inputs: | |||
| text: dora-llama-cpp-python/text | |||
| outputs: | |||
| - audio | |||
| env: | |||
| ACTIVATION_WORDS: you | |||
| - id: dora-pyaudio | |||
| build: pip install -e ../../node-hub/dora-pyaudio | |||
| path: dora-pyaudio | |||
| inputs: | |||
| audio: dora-kokoro-tts/audio | |||
| timestamp_start: dora-vad/timestamp_start | |||
| @@ -0,0 +1,9 @@ | |||
| import pytest | |||
| def test_import_main(): | |||
| from dora_llama_cpp_python.main import main | |||
| # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | |||
| with pytest.raises(RuntimeError): | |||
| main() | |||