From 5361cb930f3d09fe2226cb63164e2f2a5f81ec7a Mon Sep 17 00:00:00 2001 From: Shashwat Patil <117521627+ShashwatPatil@users.noreply.github.com> Date: Sun, 16 Mar 2025 15:56:21 +0530 Subject: [PATCH] updated theparameter input --- node-hub/dora-llama-cpp-python/README.md | 25 +++---------------- .../dora_llama_cpp_python/main.py | 20 ++++++--------- node-hub/dora-llama-cpp-python/pyproject.toml | 2 +- node-hub/dora-llama-cpp-python/test.yml | 2 +- 4 files changed, 13 insertions(+), 36 deletions(-) diff --git a/node-hub/dora-llama-cpp-python/README.md b/node-hub/dora-llama-cpp-python/README.md index 2acac9df..0ee13479 100644 --- a/node-hub/dora-llama-cpp-python/README.md +++ b/node-hub/dora-llama-cpp-python/README.md @@ -27,26 +27,6 @@ The node can be configured in your dataflow YAML file: ```yaml -# Using a local model - -- id: dora-llama-cpp-python - build: pip install -e path/to/dora-llama-cpp-python - path: dora-llama-cpp-python - inputs: - text: source_node/text # Input text to generate response for - outputs: - - text # Generated response text - env: - MODEL_LOCAL_PATH: "./models/my-local-model.gguf" - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - ACTIVATION_WORDS: "what how who where you" - MAX_TOKENS: "512" - N_GPU_LAYERS: "35" # Enable GPU acceleration - N_THREADS: "4" # CPU threads - CONTEXT_SIZE: "4096" # Maximum context window - - - # Using a HuggingFace model - id: dora-llama-cpp-python build: pip install -e path/to/dora-llama-cpp-python @@ -56,7 +36,7 @@ The node can be configured in your dataflow YAML file: outputs: - text # Generated response text env: - MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF" + MODEL_NAME_OR_PATH: "TheBloke/Llama-2-7B-Chat-GGUF" MODEL_FILE_PATTERN: "*Q4_K_M.gguf" SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." ACTIVATION_WORDS: "what how who where you" @@ -68,7 +48,8 @@ The node can be configured in your dataflow YAML file: ### Configuration Options -- `MODEL_PATH`: Path to your GGUF model file (default: "./models/llama-2-7b-chat.Q4_K_M.gguf") +- `MODEL_NAME_OR_PATH`: Path to local model file or HuggingFace repo id (default: "TheBloke/Llama-2-7B-Chat-GGUF") +- `MODEL_FILE_PATTERN`: Pattern to match model file when downloading from HF (default: "*Q4_K_M.gguf") - `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior - `ACTIVATION_WORDS`: Space-separated list of words that trigger model response - `MAX_TOKENS`: Maximum number of tokens to generate (default: 512) diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py index 18016f46..05a7bd24 100644 --- a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py +++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py @@ -11,8 +11,7 @@ SYSTEM_PROMPT = os.getenv( "SYSTEM_PROMPT", "You're a very succinct AI assistant with short answers.", ) -MODEL_LOCAL_PATH = os.getenv("MODEL_LOCAL_PATH", "") # Local model path takes precedence -MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/Llama-2-7B-Chat-GGUF") # HF repo as fallback +MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "TheBloke/Llama-2-7B-Chat-GGUF") MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*Q4_K_M.gguf") MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512")) N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0")) @@ -25,13 +24,10 @@ def get_model(): from llama_cpp import Llama try: - # Check if local path is provided - if MODEL_LOCAL_PATH: - model_path = Path(MODEL_LOCAL_PATH) - if not model_path.exists(): - raise FileNotFoundError(f"Local model not found at {MODEL_LOCAL_PATH}") - - logging.info(f"Loading local model from {MODEL_LOCAL_PATH}") + # Check if path exists locally + model_path = Path(MODEL_NAME_OR_PATH) + if model_path.exists(): + logging.info(f"Loading local model from {MODEL_NAME_OR_PATH}") llm = Llama( model_path=str(model_path), n_gpu_layers=N_GPU_LAYERS, @@ -40,10 +36,10 @@ def get_model(): verbose=False ) else: - # Load from HuggingFace if no local path - logging.info(f"Downloading model {MODEL_NAME} with pattern {MODEL_FILE_PATTERN}") + # Load from HuggingFace + logging.info(f"Downloading model {MODEL_NAME_OR_PATH} with pattern {MODEL_FILE_PATTERN}") llm = Llama.from_pretrained( - repo_id=MODEL_NAME, + repo_id=MODEL_NAME_OR_PATH, filename=MODEL_FILE_PATTERN, n_gpu_layers=N_GPU_LAYERS, n_ctx=CONTEXT_SIZE, diff --git a/node-hub/dora-llama-cpp-python/pyproject.toml b/node-hub/dora-llama-cpp-python/pyproject.toml index a6006fd1..8b832f72 100644 --- a/node-hub/dora-llama-cpp-python/pyproject.toml +++ b/node-hub/dora-llama-cpp-python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dora-llama-cpp-python" -version = "1.0.0" +version = "1.0.1" authors = [{ name = "Shashwat Patil", email = "email@email.com" }] description = "dora-llama-cpp-python" license = { text = "MIT" } diff --git a/node-hub/dora-llama-cpp-python/test.yml b/node-hub/dora-llama-cpp-python/test.yml index 893cbfb4..3a5a210f 100644 --- a/node-hub/dora-llama-cpp-python/test.yml +++ b/node-hub/dora-llama-cpp-python/test.yml @@ -34,7 +34,7 @@ nodes: outputs: - text env: - MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF" # Llama 2.7B model pull from Hugging Face + MODEL_NAME_OR_PATH: TheBloke/Llama-2-7B-Chat-GGUF # Llama 2.7B model pull from Hugging Face MODEL_FILE_PATTERN: "*Q4_K_M.gguf" SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." ACTIVATION_WORDS: "what how who where you"