From ac0e6a31c6c7006b316357ab075b7e39a2074fc3 Mon Sep 17 00:00:00 2001 From: haixuantao Date: Thu, 30 Jan 2025 10:37:52 +0100 Subject: [PATCH] Adding qwen llm model --- examples/llm/.gitignore | 1 + examples/llm/qwen-dev.yml | 55 ++++++++++++++++++++++ node-hub/dora-qwen/README.md | 37 +++++++++++++++ node-hub/dora-qwen/dora_qwen/__init__.py | 11 +++++ node-hub/dora-qwen/dora_qwen/__main__.py | 5 ++ node-hub/dora-qwen/dora_qwen/main.py | 54 +++++++++++++++++++++ node-hub/dora-qwen/pyproject.toml | 16 +++++++ node-hub/dora-qwen/tests/test_dora_qwen.py | 9 ++++ 8 files changed, 188 insertions(+) create mode 100644 examples/llm/.gitignore create mode 100755 examples/llm/qwen-dev.yml create mode 100644 node-hub/dora-qwen/README.md create mode 100644 node-hub/dora-qwen/dora_qwen/__init__.py create mode 100644 node-hub/dora-qwen/dora_qwen/__main__.py create mode 100644 node-hub/dora-qwen/dora_qwen/main.py create mode 100644 node-hub/dora-qwen/pyproject.toml create mode 100644 node-hub/dora-qwen/tests/test_dora_qwen.py diff --git a/examples/llm/.gitignore b/examples/llm/.gitignore new file mode 100644 index 00000000..eede66d8 --- /dev/null +++ b/examples/llm/.gitignore @@ -0,0 +1 @@ +*.pt \ No newline at end of file diff --git a/examples/llm/qwen-dev.yml b/examples/llm/qwen-dev.yml new file mode 100755 index 00000000..3cb67aa6 --- /dev/null +++ b/examples/llm/qwen-dev.yml @@ -0,0 +1,55 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + inputs: + tick: dora/timer/millis/2000 + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + + - id: dora-qwen + build: pip install -e ../../node-hub/dora-qwen + path: dora-qwen + inputs: + text: dora-distil-whisper/text + outputs: + - text + + - id: plot + build: pip install -e ../../node-hub/dora-rerun + path: dora-rerun + inputs: + text_qwen: dora-qwen/text + text_whisper: dora-distil-whisper/text + + - id: dora-outtetts + build: pip install -e ../../node-hub/dora-outtetts + path: dora-outtetts + inputs: + text: dora-qwen/text + outputs: + - audio + + - id: dora-pyaudio + build: pip install -e ../../node-hub/dora-pyaudio + path: dora-pyaudio + inputs: + audio: dora-outtetts/audio diff --git a/node-hub/dora-qwen/README.md b/node-hub/dora-qwen/README.md new file mode 100644 index 00000000..99cf1ad4 --- /dev/null +++ b/node-hub/dora-qwen/README.md @@ -0,0 +1,37 @@ +# dora-qwen + +## Getting started + +- Install it with pip: + +```bash +pip install -e . +``` + +## Contribution Guide + +- Format with [ruff](https://docs.astral.sh/ruff/): + +```bash +ruff check . --fix +``` + +- Lint with ruff: + +```bash +ruff check . +``` + +- Test with [pytest](https://github.com/pytest-dev/pytest) + +```bash +pytest . # Test +``` + +## YAML Specification + +## Examples + +## License + +dora-qwen's code are released under the MIT License diff --git a/node-hub/dora-qwen/dora_qwen/__init__.py b/node-hub/dora-qwen/dora_qwen/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-qwen/dora_qwen/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-qwen/dora_qwen/__main__.py b/node-hub/dora-qwen/dora_qwen/__main__.py new file mode 100644 index 00000000..bcbfde6d --- /dev/null +++ b/node-hub/dora-qwen/dora_qwen/__main__.py @@ -0,0 +1,5 @@ +from .main import main + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-qwen/dora_qwen/main.py b/node-hub/dora-qwen/dora_qwen/main.py new file mode 100644 index 00000000..eee6cc31 --- /dev/null +++ b/node-hub/dora-qwen/dora_qwen/main.py @@ -0,0 +1,54 @@ +import os + +import pyarrow as pa +from dora import Node +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_name = "Qwen/Qwen2.5-0.5B-Instruct" + +model = AutoModelForCausalLM.from_pretrained( + model_name, torch_dtype="auto", device_map="auto" +) +tokenizer = AutoTokenizer.from_pretrained(model_name) + +TRIGGER_WORDS = ["you", "wh", "tu"] + + +def generate_response(prompt: str, history) -> str: + history += [{"role": "user", "content": prompt}] + text = tokenizer.apply_chat_template( + history, tokenize=False, add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + generated_ids = model.generate(**model_inputs, max_new_tokens=512) + generated_ids = [ + output_ids[len(input_ids) :] + for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + history += [{"role": "assistant", "content": response}] + return response, history + + +def main(): + history = [ + { + "role": "system", + "content": "You are a Reachy robot, that gives extremely short answers only.", + }, + ] + node = Node() + + for event in node: + if event["type"] == "INPUT": + # Warning: Make sure to add my_output_id and my_input_id within the dataflow. + text = event["value"][0].as_py() + if any(word in text.lower() for word in TRIGGER_WORDS): + response, history = generate_response(text, history) + node.send_output( + output_id="text", data=pa.array([response]), metadata={} + ) + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-qwen/pyproject.toml b/node-hub/dora-qwen/pyproject.toml new file mode 100644 index 00000000..1ab515dd --- /dev/null +++ b/node-hub/dora-qwen/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "dora-qwen" +version = "0.0.0" +authors = [{ name = "Your Name", email = "email@email.com" }] +description = "dora-qwen" +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.8" + +dependencies = ["dora-rs >= 0.3.6"] + +[dependency-groups] +dev = ["pytest >=8.1.1", "ruff >=0.9.1"] + +[project.scripts] +dora-qwen = "dora_qwen.main:main" diff --git a/node-hub/dora-qwen/tests/test_dora_qwen.py b/node-hub/dora-qwen/tests/test_dora_qwen.py new file mode 100644 index 00000000..da87ebb9 --- /dev/null +++ b/node-hub/dora-qwen/tests/test_dora_qwen.py @@ -0,0 +1,9 @@ +import pytest + + +def test_import_main(): + from dora_qwen.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main()