Browse Source

Adding qwen llm model

tags/v0.3.9-rc1
haixuantao 11 months ago
parent
commit
ac0e6a31c6
8 changed files with 188 additions and 0 deletions
  1. +1
    -0
      examples/llm/.gitignore
  2. +55
    -0
      examples/llm/qwen-dev.yml
  3. +37
    -0
      node-hub/dora-qwen/README.md
  4. +11
    -0
      node-hub/dora-qwen/dora_qwen/__init__.py
  5. +5
    -0
      node-hub/dora-qwen/dora_qwen/__main__.py
  6. +54
    -0
      node-hub/dora-qwen/dora_qwen/main.py
  7. +16
    -0
      node-hub/dora-qwen/pyproject.toml
  8. +9
    -0
      node-hub/dora-qwen/tests/test_dora_qwen.py

+ 1
- 0
examples/llm/.gitignore View File

@@ -0,0 +1 @@
*.pt

+ 55
- 0
examples/llm/qwen-dev.yml View File

@@ -0,0 +1,55 @@
nodes:
- id: dora-microphone
build: pip install -e ../../node-hub/dora-microphone
path: dora-microphone
inputs:
tick: dora/timer/millis/2000
outputs:
- audio

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio: dora-microphone/audio
outputs:
- audio

- id: dora-distil-whisper
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
input: dora-vad/audio
outputs:
- text
env:
TARGET_LANGUAGE: english

- id: dora-qwen
build: pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:
text: dora-distil-whisper/text
outputs:
- text

- id: plot
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
text_qwen: dora-qwen/text
text_whisper: dora-distil-whisper/text

- id: dora-outtetts
build: pip install -e ../../node-hub/dora-outtetts
path: dora-outtetts
inputs:
text: dora-qwen/text
outputs:
- audio

- id: dora-pyaudio
build: pip install -e ../../node-hub/dora-pyaudio
path: dora-pyaudio
inputs:
audio: dora-outtetts/audio

+ 37
- 0
node-hub/dora-qwen/README.md View File

@@ -0,0 +1,37 @@
# dora-qwen

## Getting started

- Install it with pip:

```bash
pip install -e .
```

## Contribution Guide

- Format with [ruff](https://docs.astral.sh/ruff/):

```bash
ruff check . --fix
```

- Lint with ruff:

```bash
ruff check .
```

- Test with [pytest](https://github.com/pytest-dev/pytest)

```bash
pytest . # Test
```

## YAML Specification

## Examples

## License

dora-qwen's code are released under the MIT License

+ 11
- 0
node-hub/dora-qwen/dora_qwen/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 5
- 0
node-hub/dora-qwen/dora_qwen/__main__.py View File

@@ -0,0 +1,5 @@
from .main import main


if __name__ == "__main__":
main()

+ 54
- 0
node-hub/dora-qwen/dora_qwen/main.py View File

@@ -0,0 +1,54 @@
import os

import pyarrow as pa
from dora import Node
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-0.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype="auto", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

TRIGGER_WORDS = ["you", "wh", "tu"]


def generate_response(prompt: str, history) -> str:
history += [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
history, tokenize=False, add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
generated_ids = [
output_ids[len(input_ids) :]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
history += [{"role": "assistant", "content": response}]
return response, history


def main():
history = [
{
"role": "system",
"content": "You are a Reachy robot, that gives extremely short answers only.",
},
]
node = Node()

for event in node:
if event["type"] == "INPUT":
# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
text = event["value"][0].as_py()
if any(word in text.lower() for word in TRIGGER_WORDS):
response, history = generate_response(text, history)
node.send_output(
output_id="text", data=pa.array([response]), metadata={}
)


if __name__ == "__main__":
main()

+ 16
- 0
node-hub/dora-qwen/pyproject.toml View File

@@ -0,0 +1,16 @@
[project]
name = "dora-qwen"
version = "0.0.0"
authors = [{ name = "Your Name", email = "email@email.com" }]
description = "dora-qwen"
license = { text = "MIT" }
readme = "README.md"
requires-python = ">=3.8"

dependencies = ["dora-rs >= 0.3.6"]

[dependency-groups]
dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

[project.scripts]
dora-qwen = "dora_qwen.main:main"

+ 9
- 0
node-hub/dora-qwen/tests/test_dora_qwen.py View File

@@ -0,0 +1,9 @@
import pytest


def test_import_main():
from dora_qwen.main import main

# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
with pytest.raises(RuntimeError):
main()

Loading…
Cancel
Save