| @@ -3,8 +3,7 @@ | |||
| Make sure to have, dora, pip and cargo installed. | |||
| ```bash | |||
| dora build https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml | |||
| dora run https://raw.githubusercontent.com/dora-rs/dora/main/examples/speech-to-speech/outtetts.yml | |||
| # Wait for models to download which can takes a bit of time. | |||
| uv venv --seed -p 3.11 | |||
| dora build kokoro-dev.yml | |||
| dora run kokoro-dev.yml | |||
| ``` | |||
| @@ -3,7 +3,7 @@ nodes: | |||
| build: pip install opencv-video-capture | |||
| path: opencv-video-capture | |||
| inputs: | |||
| tick: dora/timer/millis/100 | |||
| tick: dora/timer/millis/1000 | |||
| outputs: | |||
| - image | |||
| env: | |||
| @@ -37,7 +37,7 @@ nodes: | |||
| - id: dora-qwenvl | |||
| build: pip install -e ../../node-hub/dora-qwen2-5-vl | |||
| path: dora-qwenvl | |||
| path: dora-qwen2-5-vl | |||
| inputs: | |||
| image: camera/image | |||
| text: dora-distil-whisper/text | |||
| @@ -45,7 +45,7 @@ nodes: | |||
| - text | |||
| env: | |||
| DEFAULT_QUESTION: Describe the image in three words. | |||
| HISTORY: True | |||
| IMAGE_RESIZE_RATIO: "0.5" | |||
| - id: plot | |||
| build: pip install dora-rerun | |||
| @@ -55,9 +55,9 @@ nodes: | |||
| text_qwenvl: dora-qwenvl/text | |||
| text_whisper: dora-distil-whisper/text | |||
| - id: dora-outtetts | |||
| build: pip install -e ../../node-hub/dora-outtetts | |||
| path: dora-outtetts | |||
| - id: dora-kokoro-tts | |||
| build: pip install -e ../../node-hub/dora-kokoro-tts | |||
| path: dora-kokoro-tts | |||
| inputs: | |||
| text: dora-qwenvl/text | |||
| outputs: | |||
| @@ -67,4 +67,4 @@ nodes: | |||
| build: pip install -e ../../node-hub/dora-pyaudio | |||
| path: dora-pyaudio | |||
| inputs: | |||
| audio: dora-outtetts/audio | |||
| audio: dora-kokoro-tts/audio | |||
| @@ -21,8 +21,6 @@ def main(): | |||
| ) | |||
| for i, (gs, ps, audio) in enumerate(generator): | |||
| audio = audio.numpy() | |||
| print("audio detected") | |||
| sf.write(f"text_{i}.wav", audio, 24000) # save each audio file | |||
| node.send_output("audio", pa.array(audio), {"sample_rate": 24000}) | |||
| @@ -11,6 +11,15 @@ SYSTEM_PROMPT = os.getenv( | |||
| ) | |||
| def get_model_gguf(): | |||
| from llama_cpp import Llama | |||
| llm = Llama.from_pretrained( | |||
| repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF", filename="*fp16.gguf", verbose=False | |||
| ) | |||
| return llm | |||
| def get_model_darwin(): | |||
| from mlx_lm import load # noqa | |||
| @@ -28,7 +37,7 @@ def get_model_huggingface(): | |||
| return model, tokenizer | |||
| TRIGGER_WORDS = ["you", "wh", "tu"] | |||
| ACTIVATION_WORDS = ["what", "how", "who", "where", "you"] | |||
| def generate_hf(model, tokenizer, prompt: str, history) -> str: | |||
| @@ -48,16 +57,11 @@ def generate_hf(model, tokenizer, prompt: str, history) -> str: | |||
| def main(): | |||
| if SYSTEM_PROMPT != "": | |||
| history = [ | |||
| { | |||
| "role": "system", | |||
| "content": SYSTEM_PROMPT, | |||
| }, | |||
| ] | |||
| history = [] | |||
| # If OS is not Darwin, use Huggingface model | |||
| if sys.platform != "darwin": | |||
| if sys.platform != "": | |||
| model = get_model_gguf() | |||
| elif sys.platform == "huggingface": | |||
| model, tokenizer = get_model_huggingface() | |||
| else: | |||
| model, tokenizer = get_model_darwin() | |||
| @@ -68,10 +72,21 @@ def main(): | |||
| if event["type"] == "INPUT": | |||
| # Warning: Make sure to add my_output_id and my_input_id within the dataflow. | |||
| text = event["value"][0].as_py() | |||
| if True: | |||
| words = text.lower().split() | |||
| if any(word in ACTIVATION_WORDS for word in words): | |||
| # On linux, Windows | |||
| if sys.platform != "darwin": | |||
| response, history = generate_hf(text, history) | |||
| if sys.platform != "": | |||
| response = model( | |||
| f"Q: {text} A: ", # Prompt | |||
| max_tokens=24, | |||
| stop=[ | |||
| "Q:", | |||
| "\n", | |||
| ], # Stop generating just before the model would generate a new question | |||
| )["choices"][0]["text"] | |||
| elif sys.platform == "huggingface": | |||
| response, history = generate_hf(model, tokenizer, text, history) | |||
| else: | |||
| from mlx_lm import generate | |||
| @@ -17,10 +17,27 @@ dependencies = [ | |||
| "accelerate>=1.3.0", | |||
| "transformers", | |||
| "mlx-lm>=0.21.1; sys_platform == 'darwin'", | |||
| "llama-cpp-python", | |||
| ] | |||
| [tool.uv.sources] | |||
| llama-cpp-python = [ | |||
| { index = "llama_cpp_python_metal", marker = "sys_platform == 'darwin'" }, | |||
| { index = "llama_cpp_python_cu121", marker = "sys_platform == 'linux'" }, | |||
| ] | |||
| [dependency-groups] | |||
| dev = ["pytest >=8.1.1", "ruff >=0.9.1"] | |||
| [[tool.uv.index]] | |||
| name = "llama_cpp_python_cu121" | |||
| url = "https://abetlen.github.io/llama-cpp-python/whl/metal" | |||
| explicit = true | |||
| [[tool.uv.index]] | |||
| name = "llama_cpp_python_metal" | |||
| url = "https://abetlen.github.io/llama-cpp-python/whl/metal" | |||
| explicit = true | |||
| [project.scripts] | |||
| dora-qwen = "dora_qwen.main:main" | |||