From 5361cb930f3d09fe2226cb63164e2f2a5f81ec7a Mon Sep 17 00:00:00 2001
From: Shashwat Patil <117521627+ShashwatPatil@users.noreply.github.com>
Date: Sun, 16 Mar 2025 15:56:21 +0530
Subject: [PATCH] updated theparameter input

---
 node-hub/dora-llama-cpp-python/README.md      | 25 +++----------------
 .../dora_llama_cpp_python/main.py             | 20 ++++++---------
 node-hub/dora-llama-cpp-python/pyproject.toml |  2 +-
 node-hub/dora-llama-cpp-python/test.yml       |  2 +-
 4 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/node-hub/dora-llama-cpp-python/README.md b/node-hub/dora-llama-cpp-python/README.md
index 2acac9df..0ee13479 100644
--- a/node-hub/dora-llama-cpp-python/README.md
+++ b/node-hub/dora-llama-cpp-python/README.md
@@ -27,26 +27,6 @@ The node can be configured in your dataflow YAML file:
 
 ```yaml
 
-# Using a local model
-
-- id: dora-llama-cpp-python
-  build: pip install -e path/to/dora-llama-cpp-python
-  path: dora-llama-cpp-python
-  inputs:
-    text: source_node/text  # Input text to generate response for
-  outputs:
-    - text  # Generated response text
-  env:
-    MODEL_LOCAL_PATH: "./models/my-local-model.gguf"
-    SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-    ACTIVATION_WORDS: "what how who where you"
-    MAX_TOKENS: "512"
-    N_GPU_LAYERS: "35"     # Enable GPU acceleration
-    N_THREADS: "4"         # CPU threads
-    CONTEXT_SIZE: "4096"   # Maximum context window
-
-
-
 # Using a HuggingFace model
 - id: dora-llama-cpp-python
   build: pip install -e path/to/dora-llama-cpp-python
@@ -56,7 +36,7 @@ The node can be configured in your dataflow YAML file:
   outputs:
     - text  # Generated response text
   env:
-    MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"
+    MODEL_NAME_OR_PATH: "TheBloke/Llama-2-7B-Chat-GGUF"
     MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
     SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
     ACTIVATION_WORDS: "what how who where you"
@@ -68,7 +48,8 @@ The node can be configured in your dataflow YAML file:
 
 ### Configuration Options
 
-- `MODEL_PATH`: Path to your GGUF model file (default: "./models/llama-2-7b-chat.Q4_K_M.gguf")
+- `MODEL_NAME_OR_PATH`: Path to local model file or HuggingFace repo id (default: "TheBloke/Llama-2-7B-Chat-GGUF")
+- `MODEL_FILE_PATTERN`: Pattern to match model file when downloading from HF (default: "*Q4_K_M.gguf")
 - `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior
 - `ACTIVATION_WORDS`: Space-separated list of words that trigger model response
 - `MAX_TOKENS`: Maximum number of tokens to generate (default: 512)
diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
index 18016f46..05a7bd24 100644
--- a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
@@ -11,8 +11,7 @@ SYSTEM_PROMPT = os.getenv(
     "SYSTEM_PROMPT",
     "You're a very succinct AI assistant with short answers.",
 )
-MODEL_LOCAL_PATH = os.getenv("MODEL_LOCAL_PATH", "")  # Local model path takes precedence
-MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/Llama-2-7B-Chat-GGUF")  # HF repo as fallback
+MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "TheBloke/Llama-2-7B-Chat-GGUF")
 MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*Q4_K_M.gguf")
 MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
 N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
@@ -25,13 +24,10 @@ def get_model():
     from llama_cpp import Llama
     
     try:
-        # Check if local path is provided
-        if MODEL_LOCAL_PATH:
-            model_path = Path(MODEL_LOCAL_PATH)
-            if not model_path.exists():
-                raise FileNotFoundError(f"Local model not found at {MODEL_LOCAL_PATH}")
-            
-            logging.info(f"Loading local model from {MODEL_LOCAL_PATH}")
+        # Check if path exists locally
+        model_path = Path(MODEL_NAME_OR_PATH)
+        if model_path.exists():
+            logging.info(f"Loading local model from {MODEL_NAME_OR_PATH}")
             llm = Llama(
                 model_path=str(model_path),
                 n_gpu_layers=N_GPU_LAYERS,
@@ -40,10 +36,10 @@ def get_model():
                 verbose=False
             )
         else:
-            # Load from HuggingFace if no local path
-            logging.info(f"Downloading model {MODEL_NAME} with pattern {MODEL_FILE_PATTERN}")
+            # Load from HuggingFace
+            logging.info(f"Downloading model {MODEL_NAME_OR_PATH} with pattern {MODEL_FILE_PATTERN}")
             llm = Llama.from_pretrained(
-                repo_id=MODEL_NAME,
+                repo_id=MODEL_NAME_OR_PATH,
                 filename=MODEL_FILE_PATTERN,
                 n_gpu_layers=N_GPU_LAYERS,
                 n_ctx=CONTEXT_SIZE,
diff --git a/node-hub/dora-llama-cpp-python/pyproject.toml b/node-hub/dora-llama-cpp-python/pyproject.toml
index a6006fd1..8b832f72 100644
--- a/node-hub/dora-llama-cpp-python/pyproject.toml
+++ b/node-hub/dora-llama-cpp-python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dora-llama-cpp-python"
-version = "1.0.0"
+version = "1.0.1"
 authors = [{ name = "Shashwat Patil", email = "email@email.com" }]
 description = "dora-llama-cpp-python"
 license = { text = "MIT" }
diff --git a/node-hub/dora-llama-cpp-python/test.yml b/node-hub/dora-llama-cpp-python/test.yml
index 893cbfb4..3a5a210f 100644
--- a/node-hub/dora-llama-cpp-python/test.yml
+++ b/node-hub/dora-llama-cpp-python/test.yml
@@ -34,7 +34,7 @@ nodes:
     outputs:
       - text
     env:
-      MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"   # Llama 2.7B model pull from Hugging Face
+      MODEL_NAME_OR_PATH: TheBloke/Llama-2-7B-Chat-GGUF   # Llama 2.7B model pull from Hugging Face
       MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
       SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
       ACTIVATION_WORDS: "what how who where you"