Browse Source

Fix kokoro demo with gguf based inference of qwen2.5 0.5B

tags/v0.3.10-rc3
haixuantao 10 months ago
parent
commit
2d12aa0f94
5 changed files with 55 additions and 26 deletions
  1. +3
    -4
      examples/speech-to-speech/README.md
  2. +7
    -7
      examples/vlm/qwen2-5-vl-speech-to-speech-dev.yml
  3. +0
    -2
      node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py
  4. +28
    -13
      node-hub/dora-qwen/dora_qwen/main.py
  5. +17
    -0
      node-hub/dora-qwen/pyproject.toml

+ 3
- 4
examples/speech-to-speech/README.md View File

@@ -3,8 +3,7 @@
Make sure to have, dora, pip and cargo installed.

```bash
dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml
dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml

# Wait for models to download which can takes a bit of time.
uv venv --seed -p 3.11
dora build kokoro-dev.yml
dora run kokoro-dev.yml
```

+ 7
- 7
examples/vlm/qwen2-5-vl-speech-to-speech-dev.yml View File

@@ -3,7 +3,7 @@ nodes:
build: pip install opencv-video-capture
path: opencv-video-capture
inputs:
tick: dora/timer/millis/100
tick: dora/timer/millis/1000
outputs:
- image
env:
@@ -37,7 +37,7 @@ nodes:

- id: dora-qwenvl
build: pip install -e ../../node-hub/dora-qwen2-5-vl
path: dora-qwenvl
path: dora-qwen2-5-vl
inputs:
image: camera/image
text: dora-distil-whisper/text
@@ -45,7 +45,7 @@ nodes:
- text
env:
DEFAULT_QUESTION: Describe the image in three words.
HISTORY: True
IMAGE_RESIZE_RATIO: "0.5"

- id: plot
build: pip install dora-rerun
@@ -55,9 +55,9 @@ nodes:
text_qwenvl: dora-qwenvl/text
text_whisper: dora-distil-whisper/text

- id: dora-outtetts
build: pip install -e ../../node-hub/dora-outtetts
path: dora-outtetts
- id: dora-kokoro-tts
build: pip install -e ../../node-hub/dora-kokoro-tts
path: dora-kokoro-tts
inputs:
text: dora-qwenvl/text
outputs:
@@ -67,4 +67,4 @@ nodes:
build: pip install -e ../../node-hub/dora-pyaudio
path: dora-pyaudio
inputs:
audio: dora-outtetts/audio
audio: dora-kokoro-tts/audio

+ 0
- 2
node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py View File

@@ -21,8 +21,6 @@ def main():
)
for i, (gs, ps, audio) in enumerate(generator):
audio = audio.numpy()
print("audio detected")
sf.write(f"text_{i}.wav", audio, 24000) # save each audio file
node.send_output("audio", pa.array(audio), {"sample_rate": 24000})




+ 28
- 13
node-hub/dora-qwen/dora_qwen/main.py View File

@@ -11,6 +11,15 @@ SYSTEM_PROMPT = os.getenv(
)


def get_model_gguf():
from llama_cpp import Llama

llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename="*fp16.gguf", verbose=False
)
return llm


def get_model_darwin():
from mlx_lm import load # noqa

@@ -28,7 +37,7 @@ def get_model_huggingface():
return model, tokenizer


TRIGGER_WORDS = ["you", "wh", "tu"]
ACTIVATION_WORDS = ["what", "how", "who", "where", "you"]


def generate_hf(model, tokenizer, prompt: str, history) -> str:
@@ -48,16 +57,11 @@ def generate_hf(model, tokenizer, prompt: str, history) -> str:


def main():
if SYSTEM_PROMPT != "":
history = [
{
"role": "system",
"content": SYSTEM_PROMPT,
},
]

history = []
# If OS is not Darwin, use Huggingface model
if sys.platform != "darwin":
if sys.platform != "":
model = get_model_gguf()
elif sys.platform == "huggingface":
model, tokenizer = get_model_huggingface()
else:
model, tokenizer = get_model_darwin()
@@ -68,10 +72,21 @@ def main():
if event["type"] == "INPUT":
# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
text = event["value"][0].as_py()
if True:
words = text.lower().split()

if any(word in ACTIVATION_WORDS for word in words):
# On linux, Windows
if sys.platform != "darwin":
response, history = generate_hf(text, history)
if sys.platform != "":
response = model(
f"Q: {text} A: ", # Prompt
max_tokens=24,
stop=[
"Q:",
"\n",
], # Stop generating just before the model would generate a new question
)["choices"][0]["text"]
elif sys.platform == "huggingface":
response, history = generate_hf(model, tokenizer, text, history)
else:
from mlx_lm import generate



+ 17
- 0
node-hub/dora-qwen/pyproject.toml View File

@@ -17,10 +17,27 @@ dependencies = [
"accelerate>=1.3.0",
"transformers",
"mlx-lm>=0.21.1; sys_platform == 'darwin'",
"llama-cpp-python",
]

[tool.uv.sources]
llama-cpp-python = [
{ index = "llama_cpp_python_metal", marker = "sys_platform == 'darwin'" },
{ index = "llama_cpp_python_cu121", marker = "sys_platform == 'linux'" },
]

[dependency-groups]
dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

[[tool.uv.index]]
name = "llama_cpp_python_cu121"
url = "https://abetlen.github.io/llama-cpp-python/whl/metal"
explicit = true

[[tool.uv.index]]
name = "llama_cpp_python_metal"
url = "https://abetlen.github.io/llama-cpp-python/whl/metal"
explicit = true

[project.scripts]
dora-qwen = "dora_qwen.main:main"

Loading…
Cancel
Save