From 7aa7a5eeca5cf47917239757685ab7f19b14478f Mon Sep 17 00:00:00 2001 From: ShashwatPatil Date: Tue, 11 Mar 2025 23:28:22 +0530 Subject: [PATCH] added the dora-llama-cpp-python node --- node-hub/dora-llama-cpp-python/README.md | 138 ++++++++++++++++++ .../dora_llama_cpp_python/__init__.py | 11 ++ .../dora_llama_cpp_python/__main__.py | 5 + .../dora_llama_cpp_python/main.py | 93 ++++++++++++ node-hub/dora-llama-cpp-python/pyproject.toml | 26 ++++ node-hub/dora-llama-cpp-python/test.yml | 61 ++++++++ .../tests/test_dora_llama_cpp_python.py | 9 ++ 7 files changed, 343 insertions(+) create mode 100644 node-hub/dora-llama-cpp-python/README.md create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py create mode 100644 node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py create mode 100644 node-hub/dora-llama-cpp-python/pyproject.toml create mode 100644 node-hub/dora-llama-cpp-python/test.yml create mode 100644 node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py diff --git a/node-hub/dora-llama-cpp-python/README.md b/node-hub/dora-llama-cpp-python/README.md new file mode 100644 index 00000000..0798c738 --- /dev/null +++ b/node-hub/dora-llama-cpp-python/README.md @@ -0,0 +1,138 @@ +# dora-llama-cpp-python + +A Dora node that provides access to LLaMA-based models using either llama.cpp or Hugging Face backends for text generation. + +## Features + +- Supports both llama.cpp (CPU) and Hugging Face (CPU/GPU) backends +- Easy integration with speech-to-text and text-to-speech pipelines +- Configurable system prompts and activation words +- Chat history support with Hugging Face models +- Lightweight CPU inference with GGUF models + +## Getting started + +### Installation + +```bash +uv venv -p 3.11 --seed +uv pip install -e . +``` + +## Usage + +The node can be configured in your dataflow YAML file: + +```yaml +- id: dora-llama-cpp-python + build: pip install -e path/to/dora-llama-cpp-python + path: dora-llama-cpp-python + inputs: + text: source_node/text # Input text to generate response for + outputs: + - text # Generated response text + env: + MODEL_BACKEND: "llama-cpp" # or "huggingface" + SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." + ACTIVATION_WORDS: "what how who where you" # Space-separated activation words +``` + +### Configuration Options + +- `MODEL_BACKEND`: Choose between: + - `llama-cpp`: Uses GGUF models via llama.cpp (CPU-optimized, default) + - `huggingface`: Uses Hugging Face Transformers models + +- `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior +- `ACTIVATION_WORDS`: Space-separated list of words that trigger model response + +## Examples + +### Basic Speech-to-Text-to-Speech Pipeline + +This example shows how to create a conversational AI pipeline that: +1. Captures audio from microphone +2. Converts speech to text +3. Generates AI responses +4. Converts responses back to speech + +```yaml +nodes: + - id: dora-microphone + build: pip install dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + - timestamp_start + + - id: dora-whisper + build: pip install dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + + - id: dora-llama-cpp-python + build: pip install -e . + path: dora-llama-cpp-python + inputs: + text: dora-whisper/text + outputs: + - text + env: + MODEL_BACKEND: llama-cpp + SYSTEM_PROMPT: "You're a helpful assistant." + ACTIVATION_WORDS: "hey help what how" + + - id: dora-tts + build: pip install dora-kokoro-tts + path: dora-kokoro-tts + inputs: + text: dora-llama-cpp-python/text + outputs: + - audio +``` + +### Running the Example + +```bash +dora build example.yml +dora run example.yml +``` + +## Contribution Guide + +- Format with [ruff](https://docs.astral.sh/ruff/): + +```bash +uv pip install ruff +uv run ruff check . --fix +``` + +- Lint with ruff: + +```bash +uv run ruff check . +``` + +- Test with [pytest](https://github.com/pytest-dev/pytest) + +```bash +uv pip install pytest +uv run pytest . # Test +``` + +## License + +dora-llama-cpp-python's code is released under the MIT License diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py new file mode 100644 index 00000000..bcbfde6d --- /dev/null +++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/__main__.py @@ -0,0 +1,5 @@ +from .main import main + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py new file mode 100644 index 00000000..e7a12a10 --- /dev/null +++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py @@ -0,0 +1,93 @@ +import os +import pyarrow as pa +from dora import Node +from transformers import AutoModelForCausalLM, AutoTokenizer + +# System prompt +SYSTEM_PROMPT = os.getenv( + "SYSTEM_PROMPT", + "You're a very succinct AI assistant with short answers.", +) + +# Model selection based on ENV variable +MODEL_BACKEND = os.getenv("MODEL_BACKEND", "llama-cpp") # Default to CPU-based Llama + + +def get_model_llama_cpp(): + """Load a GGUF model using llama-cpp-python (CPU by default).""" + from llama_cpp import Llama + + llm = Llama.from_pretrained( + repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename="*fp16.gguf", verbose=False + ) + return llm + + +def get_model_huggingface(): + """Load a Hugging Face transformers model.""" + model_name = "Qwen/Qwen2.5-0.5B-Instruct" + + model = AutoModelForCausalLM.from_pretrained( + model_name, torch_dtype="auto", device_map="cpu" + ) + tokenizer = AutoTokenizer.from_pretrained(model_name) + return model, tokenizer + + +ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "what how who where you").split() + + +def generate_hf(model, tokenizer, prompt: str, history) -> str: + """Generates text using a Hugging Face model.""" + history += [{"role": "user", "content": prompt}] + text = tokenizer.apply_chat_template( + history, tokenize=False, add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt").to("cpu") + generated_ids = model.generate(**model_inputs, max_new_tokens=512) + generated_ids = [ + output_ids[len(input_ids) :] + for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + history += [{"role": "assistant", "content": response}] + return response, history + + +def main(): + history = [] + + # Select model backend + if MODEL_BACKEND == "llama-cpp": + model = get_model_llama_cpp() + else: + model, tokenizer = get_model_huggingface() + + node = Node() + + for event in node: + if event["type"] == "INPUT": + text = event["value"][0].as_py() + words = text.lower().split() + print(words) + + if any(word in ACTIVATION_WORDS for word in words): + print("") + if MODEL_BACKEND == "llama-cpp": + response = model( + f"Q: {text} A: ", + max_tokens=24, + stop=["Q:", "\n"], + )["choices"][0]["text"] + else: + response, history = generate_hf(model, tokenizer, text, history) + + # log output + print(response) + node.send_output( + output_id="text", data=pa.array([response]), metadata={} + ) + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-llama-cpp-python/pyproject.toml b/node-hub/dora-llama-cpp-python/pyproject.toml new file mode 100644 index 00000000..a5b9c6bb --- /dev/null +++ b/node-hub/dora-llama-cpp-python/pyproject.toml @@ -0,0 +1,26 @@ +[project] +name = "dora-llama-cpp-python" +version = "0.0.0" +authors = [{ name = "Shashwat Patil", email = "email@email.com" }] +description = "dora-llama-cpp-python" +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.9" + +dependencies = [ + "dora-rs >= 0.3.9", + "torch == 2.4.0", + "torchvision >= 0.19", + "torchaudio >= 2.1.0", + "opencv-python >= 4.1.1", + "modelscope >= 1.18.1", + "accelerate >= 1.3.0", + "transformers", + "llama-cpp-python", +] + +[dependency-groups] +dev = ["pytest >=8.1.1", "ruff >=0.9.1"] + +[project.scripts] +dora-llama-cpp-python = "dora_llama_cpp_python.main:main" diff --git a/node-hub/dora-llama-cpp-python/test.yml b/node-hub/dora-llama-cpp-python/test.yml new file mode 100644 index 00000000..6aa18a97 --- /dev/null +++ b/node-hub/dora-llama-cpp-python/test.yml @@ -0,0 +1,61 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + - timestamp_start + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: dora-llama-cpp-python + build: pip install -e ../../node-hub/dora-llama-cpp-python + path: dora-llama-cpp-python + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + MODEL_BACKEND: llama-cpp # Can be changed to "huggingface" if needed + + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + text_llama: dora-llama-cpp-python/text + text_whisper: dora-distil-whisper/text + + - id: dora-kokoro-tts + build: pip install -e ../../node-hub/dora-kokoro-tts + path: dora-kokoro-tts + inputs: + text: dora-llama-cpp-python/text + outputs: + - audio + env: + ACTIVATION_WORDS: you + + - id: dora-pyaudio + build: pip install -e ../../node-hub/dora-pyaudio + path: dora-pyaudio + inputs: + audio: dora-kokoro-tts/audio + timestamp_start: dora-vad/timestamp_start diff --git a/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py b/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py new file mode 100644 index 00000000..a4b11758 --- /dev/null +++ b/node-hub/dora-llama-cpp-python/tests/test_dora_llama_cpp_python.py @@ -0,0 +1,9 @@ +import pytest + + +def test_import_main(): + from dora_llama_cpp_python.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main()