diff --git a/node-hub/dora-llama-cpp-python/README.md b/node-hub/dora-llama-cpp-python/README.md
index 22e4e04c..2acac9df 100644
--- a/node-hub/dora-llama-cpp-python/README.md
+++ b/node-hub/dora-llama-cpp-python/README.md
@@ -26,6 +26,28 @@ uv pip install -e .
 The node can be configured in your dataflow YAML file:
 
 ```yaml
+
+# Using a local model
+
+- id: dora-llama-cpp-python
+  build: pip install -e path/to/dora-llama-cpp-python
+  path: dora-llama-cpp-python
+  inputs:
+    text: source_node/text  # Input text to generate response for
+  outputs:
+    - text  # Generated response text
+  env:
+    MODEL_LOCAL_PATH: "./models/my-local-model.gguf"
+    SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
+    ACTIVATION_WORDS: "what how who where you"
+    MAX_TOKENS: "512"
+    N_GPU_LAYERS: "35"     # Enable GPU acceleration
+    N_THREADS: "4"         # CPU threads
+    CONTEXT_SIZE: "4096"   # Maximum context window
+
+
+
+# Using a HuggingFace model
 - id: dora-llama-cpp-python
   build: pip install -e path/to/dora-llama-cpp-python
   path: dora-llama-cpp-python
@@ -34,7 +56,8 @@ The node can be configured in your dataflow YAML file:
   outputs:
     - text  # Generated response text
   env:
-    MODEL_PATH: "./models/llama-2-7b-chat.Q4_K_M.gguf"
+    MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"
+    MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
     SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
     ACTIVATION_WORDS: "what how who where you"
     MAX_TOKENS: "512"
@@ -96,7 +119,8 @@ nodes:
     outputs:
       - text
     env:
-      MODEL_PATH: "./models/llama-2-7b-chat.Q4_K_M.gguf"
+      MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"
+      MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
       SYSTEM_PROMPT: "You're a helpful assistant."
       ACTIVATION_WORDS: "hey help what how"
       MAX_TOKENS: "512"
diff --git a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
index 843a32bf..18016f46 100644
--- a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
@@ -2,16 +2,21 @@ import os
 import pyarrow as pa
 from dora import Node
 from pathlib import Path
+import logging
 
+# Configure logging
+logging.basicConfig(level=logging.INFO)
 # Environment variables for model configuration
 SYSTEM_PROMPT = os.getenv(
     "SYSTEM_PROMPT",
     "You're a very succinct AI assistant with short answers.",
 )
-MODEL_PATH = os.getenv("MODEL_PATH", "./models/llama-2-7b-chat.Q4_K_M.gguf")
+MODEL_LOCAL_PATH = os.getenv("MODEL_LOCAL_PATH", "")  # Local model path takes precedence
+MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/Llama-2-7B-Chat-GGUF")  # HF repo as fallback
+MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*Q4_K_M.gguf")
 MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
-N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))  # Number of layers to offload to GPU
-N_THREADS = int(os.getenv("N_THREADS", "4"))  # Number of CPU threads
+N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
+N_THREADS = int(os.getenv("N_THREADS", "4"))
 CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "4096"))
 
 
@@ -19,22 +24,39 @@ def get_model():
     """Load a GGUF model using llama-cpp-python with optional GPU acceleration."""
     from llama_cpp import Llama
     
-    model_path = Path(MODEL_PATH)
-    if not model_path.exists():
-        raise FileNotFoundError(
-            f"Model file not found at {MODEL_PATH}. "
-            "Download it using: wget -O models/llama-2-7b-chat.Q4_K_M.gguf "
-            "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
-        )
-
-    llm = Llama(
-        model_path=str(model_path),
-        n_gpu_layers=N_GPU_LAYERS,  # Enable GPU acceleration if > 0
-        n_ctx=CONTEXT_SIZE,         # Maximum context size
-        n_threads=N_THREADS,        # Control CPU threading
-        verbose=False
-    )
-    return llm
+    try:
+        # Check if local path is provided
+        if MODEL_LOCAL_PATH:
+            model_path = Path(MODEL_LOCAL_PATH)
+            if not model_path.exists():
+                raise FileNotFoundError(f"Local model not found at {MODEL_LOCAL_PATH}")
+            
+            logging.info(f"Loading local model from {MODEL_LOCAL_PATH}")
+            llm = Llama(
+                model_path=str(model_path),
+                n_gpu_layers=N_GPU_LAYERS,
+                n_ctx=CONTEXT_SIZE,
+                n_threads=N_THREADS,
+                verbose=False
+            )
+        else:
+            # Load from HuggingFace if no local path
+            logging.info(f"Downloading model {MODEL_NAME} with pattern {MODEL_FILE_PATTERN}")
+            llm = Llama.from_pretrained(
+                repo_id=MODEL_NAME,
+                filename=MODEL_FILE_PATTERN,
+                n_gpu_layers=N_GPU_LAYERS,
+                n_ctx=CONTEXT_SIZE,
+                n_threads=N_THREADS,
+                verbose=False
+            )
+        
+        logging.info("Model loaded successfully")
+        return llm
+    
+    except Exception as e:
+        logging.error(f"Error loading model: {e}")
+        raise
 
 
 ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "what how who where you").split()
diff --git a/node-hub/dora-llama-cpp-python/pyproject.toml b/node-hub/dora-llama-cpp-python/pyproject.toml
index 3663bf3b..a6006fd1 100644
--- a/node-hub/dora-llama-cpp-python/pyproject.toml
+++ b/node-hub/dora-llama-cpp-python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dora-llama-cpp-python"
-version = "0.0.1"
+version = "1.0.0"
 authors = [{ name = "Shashwat Patil", email = "email@email.com" }]
 description = "dora-llama-cpp-python"
 license = { text = "MIT" }
diff --git a/node-hub/dora-llama-cpp-python/test.yml b/node-hub/dora-llama-cpp-python/test.yml
index 375dc01e..893cbfb4 100644
--- a/node-hub/dora-llama-cpp-python/test.yml
+++ b/node-hub/dora-llama-cpp-python/test.yml
@@ -34,7 +34,8 @@ nodes:
     outputs:
       - text
     env:
-      MODEL_PATH: "./models/llama-2-7b-chat.Q4_K_M.gguf"
+      MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"   # Llama 2.7B model pull from Hugging Face
+      MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
       SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
       ACTIVATION_WORDS: "what how who where you"
       MAX_TOKENS: "512"