Browse Source

updated theparameter input

tags/v0.3.11-rc1
Shashwat Patil GitHub 10 months ago
parent
commit
5361cb930f
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
4 changed files with 13 additions and 36 deletions
  1. +3
    -22
      node-hub/dora-llama-cpp-python/README.md
  2. +8
    -12
      node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py
  3. +1
    -1
      node-hub/dora-llama-cpp-python/pyproject.toml
  4. +1
    -1
      node-hub/dora-llama-cpp-python/test.yml

+ 3
- 22
node-hub/dora-llama-cpp-python/README.md View File

@@ -27,26 +27,6 @@ The node can be configured in your dataflow YAML file:

```yaml

# Using a local model

- id: dora-llama-cpp-python
build: pip install -e path/to/dora-llama-cpp-python
path: dora-llama-cpp-python
inputs:
text: source_node/text # Input text to generate response for
outputs:
- text # Generated response text
env:
MODEL_LOCAL_PATH: "./models/my-local-model.gguf"
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
ACTIVATION_WORDS: "what how who where you"
MAX_TOKENS: "512"
N_GPU_LAYERS: "35" # Enable GPU acceleration
N_THREADS: "4" # CPU threads
CONTEXT_SIZE: "4096" # Maximum context window



# Using a HuggingFace model
- id: dora-llama-cpp-python
build: pip install -e path/to/dora-llama-cpp-python
@@ -56,7 +36,7 @@ The node can be configured in your dataflow YAML file:
outputs:
- text # Generated response text
env:
MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF"
MODEL_NAME_OR_PATH: "TheBloke/Llama-2-7B-Chat-GGUF"
MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
ACTIVATION_WORDS: "what how who where you"
@@ -68,7 +48,8 @@ The node can be configured in your dataflow YAML file:

### Configuration Options

- `MODEL_PATH`: Path to your GGUF model file (default: "./models/llama-2-7b-chat.Q4_K_M.gguf")
- `MODEL_NAME_OR_PATH`: Path to local model file or HuggingFace repo id (default: "TheBloke/Llama-2-7B-Chat-GGUF")
- `MODEL_FILE_PATTERN`: Pattern to match model file when downloading from HF (default: "*Q4_K_M.gguf")
- `SYSTEM_PROMPT`: Customize the AI assistant's personality/behavior
- `ACTIVATION_WORDS`: Space-separated list of words that trigger model response
- `MAX_TOKENS`: Maximum number of tokens to generate (default: 512)


+ 8
- 12
node-hub/dora-llama-cpp-python/dora_llama_cpp_python/main.py View File

@@ -11,8 +11,7 @@ SYSTEM_PROMPT = os.getenv(
"SYSTEM_PROMPT",
"You're a very succinct AI assistant with short answers.",
)
MODEL_LOCAL_PATH = os.getenv("MODEL_LOCAL_PATH", "") # Local model path takes precedence
MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/Llama-2-7B-Chat-GGUF") # HF repo as fallback
MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "TheBloke/Llama-2-7B-Chat-GGUF")
MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*Q4_K_M.gguf")
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
@@ -25,13 +24,10 @@ def get_model():
from llama_cpp import Llama
try:
# Check if local path is provided
if MODEL_LOCAL_PATH:
model_path = Path(MODEL_LOCAL_PATH)
if not model_path.exists():
raise FileNotFoundError(f"Local model not found at {MODEL_LOCAL_PATH}")
logging.info(f"Loading local model from {MODEL_LOCAL_PATH}")
# Check if path exists locally
model_path = Path(MODEL_NAME_OR_PATH)
if model_path.exists():
logging.info(f"Loading local model from {MODEL_NAME_OR_PATH}")
llm = Llama(
model_path=str(model_path),
n_gpu_layers=N_GPU_LAYERS,
@@ -40,10 +36,10 @@ def get_model():
verbose=False
)
else:
# Load from HuggingFace if no local path
logging.info(f"Downloading model {MODEL_NAME} with pattern {MODEL_FILE_PATTERN}")
# Load from HuggingFace
logging.info(f"Downloading model {MODEL_NAME_OR_PATH} with pattern {MODEL_FILE_PATTERN}")
llm = Llama.from_pretrained(
repo_id=MODEL_NAME,
repo_id=MODEL_NAME_OR_PATH,
filename=MODEL_FILE_PATTERN,
n_gpu_layers=N_GPU_LAYERS,
n_ctx=CONTEXT_SIZE,


+ 1
- 1
node-hub/dora-llama-cpp-python/pyproject.toml View File

@@ -1,6 +1,6 @@
[project]
name = "dora-llama-cpp-python"
version = "1.0.0"
version = "1.0.1"
authors = [{ name = "Shashwat Patil", email = "email@email.com" }]
description = "dora-llama-cpp-python"
license = { text = "MIT" }


+ 1
- 1
node-hub/dora-llama-cpp-python/test.yml View File

@@ -34,7 +34,7 @@ nodes:
outputs:
- text
env:
MODEL_NAME: "TheBloke/Llama-2-7B-Chat-GGUF" # Llama 2.7B model pull from Hugging Face
MODEL_NAME_OR_PATH: TheBloke/Llama-2-7B-Chat-GGUF # Llama 2.7B model pull from Hugging Face
MODEL_FILE_PATTERN: "*Q4_K_M.gguf"
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
ACTIVATION_WORDS: "what how who where you"


Loading…
Cancel
Save