You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

llama_cpp_python.yaml 853 B

10 months ago
123456789101112131415161718192021222324252627282930
  1. nodes:
  2. - id: benchmark_script
  3. build: |
  4. pip install ../mllm
  5. path: ../mllm/benchmark_script.py
  6. inputs:
  7. text: llm/text
  8. outputs:
  9. - text
  10. env:
  11. TEXT: "Please only generate the following output: This is a test"
  12. TEXT_TRUTH: "This is a test"
  13. - id: llm
  14. build: pip install -e ../../node-hub/dora-llama-cpp-python
  15. path: dora-llama-cpp-python
  16. inputs:
  17. text:
  18. source: benchmark_script/text
  19. queue-size: 10
  20. outputs:
  21. - text
  22. env:
  23. MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
  24. MODEL_FILE_PATTERN: "*fp16.gguf"
  25. SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
  26. MAX_TOKENS: "512"
  27. N_GPU_LAYERS: "35" # Enable GPU acceleration
  28. N_THREADS: "16" # CPU threads
  29. CONTEXT_SIZE: "4096" # Maximum context window