Use quantized model instead of fp16 for faster response and lower memory footprint

7 months ago · 763eb44833
--- a/examples/openai-realtime/whisper-template-metal.yml
+++ b/examples/openai-realtime/whisper-template-metal.yml
@@ -43,6 +43,7 @@ nodes:
      - text
    env:
      MODEL_NAME_OR_PATH: Qwen/Qwen2.5-0.5B-Instruct-GGUF
      MODEL_FILE_PATTERN: "*[qQ]6_[kK].[gG][gG][uU][fF]"

  - id: tts
    build: pip install -e ../../node-hub/dora-kokoro-tts