updated theparameter input

11 months ago · 5361cb930f
--- a/node-hub/dora-llama-cpp-python/README.md
+++ b/node-hub/dora-llama-cpp-python/README.md
@@ -27,26 +27,6 @@ The node can be configured in your dataflow YAML file:

 ```yaml

 # Using a local model

 - id: dora-llama-cpp-python
  build: pip install -e path/to/dora-llama-cpp-python
  path: dora-llama-cpp-python
  inputs:
    text: source_node/text  # Input text to generate response for
  outputs:
    - text  # Generated response text
  env:
    MODEL_LOCAL_PATH: "./models/my-local-model.gguf"
    SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
    ACTIVATION_WORDS: "what how who where you"
    MAX_TOKENS: "512"
    N_GPU_LAYERS: "35"     # Enable GPU acceleration
    N_THREADS: "4"         # CPU threads
    CONTEXT_SIZE: "4096"   # Maximum context window



 # Using a HuggingFace model
 - id: dora-llama-cpp-python
  build: pip install -e path/to/dora-llama-cpp-python
@@ -56,7 +36,7 @@ The node can be configured in your dataflow YAML file:
  outputs:
    - text  # Generated response text
  env:
    MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"
    MODEL_NAME_OR_PATH: "TheBloke/Llama-2-7B-Chat-GGUF"
    MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
    SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
    ACTIVATION_WORDS: "what how who where you"
@@ -68,7 +48,8 @@ The node can be configured in your dataflow YAML file:

 ### Configuration Options

 - `MODEL_PATH`: Path to your GGUF model file (default: "./models/llama-2-7b-chat.Q4_K_M.gguf")
 - `MODEL_NAME_OR_PATH`: Path to local model file or HuggingFace repo id (default: "TheBloke/Llama-2-7B-Chat-GGUF")
 - `MODEL_FILE_PATTERN`: Pattern to match model file when downloading from HF (default: "*Q4_K_M.gguf")
 - `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior
 - `ACTIVATION_WORDS`: Space-separated list of words that trigger model response
 - `MAX_TOKENS`: Maximum number of tokens to generate (default: 512)
--- a/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
+++ b/node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
@@ -11,8 +11,7 @@ SYSTEM_PROMPT = os.getenv(
    "SYSTEM_PROMPT",
    "You're a very succinct AI assistant with short answers.",
 )
 MODEL_LOCAL_PATH = os.getenv("MODEL_LOCAL_PATH", "")  # Local model path takes precedence
 MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/Llama-2-7B-Chat-GGUF")  # HF repo as fallback
 MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "TheBloke/Llama-2-7B-Chat-GGUF")
 MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*Q4_K_M.gguf")
 MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
 N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
@@ -25,13 +24,10 @@ def get_model():
    from llama_cpp import Llama
    
    try:
        # Check if local path is provided
        if MODEL_LOCAL_PATH:
            model_path = Path(MODEL_LOCAL_PATH)
            if not model_path.exists():
                raise FileNotFoundError(f"Local model not found at {MODEL_LOCAL_PATH}")
            
            logging.info(f"Loading local model from {MODEL_LOCAL_PATH}")
        # Check if path exists locally
        model_path = Path(MODEL_NAME_OR_PATH)
        if model_path.exists():
            logging.info(f"Loading local model from {MODEL_NAME_OR_PATH}")
            llm = Llama(
                model_path=str(model_path),
                n_gpu_layers=N_GPU_LAYERS,
@@ -40,10 +36,10 @@ def get_model():
                verbose=False
            )
        else:
            # Load from HuggingFace if no local path
            logging.info(f"Downloading model {MODEL_NAME} with pattern {MODEL_FILE_PATTERN}")
            # Load from HuggingFace
            logging.info(f"Downloading model {MODEL_NAME_OR_PATH} with pattern {MODEL_FILE_PATTERN}")
            llm = Llama.from_pretrained(
                repo_id=MODEL_NAME,
                repo_id=MODEL_NAME_OR_PATH,
                filename=MODEL_FILE_PATTERN,
                n_gpu_layers=N_GPU_LAYERS,
                n_ctx=CONTEXT_SIZE,
--- a/node-hub/dora-llama-cpp-python/pyproject.toml
+++ b/node-hub/dora-llama-cpp-python/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dora-llama-cpp-python"
 version = "1.0.0"
 version = "1.0.1"
 authors = [{ name = "Shashwat Patil", email = "email@email.com" }]
 description = "dora-llama-cpp-python"
 license = { text = "MIT" }
--- a/node-hub/dora-llama-cpp-python/test.yml
+++ b/node-hub/dora-llama-cpp-python/test.yml
@@ -34,7 +34,7 @@ nodes:
    outputs:
      - text
    env:
      MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"   # Llama 2.7B model pull from Hugging Face
      MODEL_NAME_OR_PATH: TheBloke/Llama-2-7B-Chat-GGUF   # Llama 2.7B model pull from Hugging Face
      MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      ACTIVATION_WORDS: "what how who where you"