diff --git a/node-hub/dora-phi4/README.md b/node-hub/dora-phi4/README.md new file mode 100644 index 00000000..3f71e018 --- /dev/null +++ b/node-hub/dora-phi4/README.md @@ -0,0 +1,40 @@ +# dora-phi4 + +## Getting started + +- Install it with uv: + +```bash +uv venv -p 3.11 --seed +uv pip install -e . +``` + +## Contribution Guide + +- Format with [ruff](https://docs.astral.sh/ruff/): + +```bash +uv pip install ruff +uv run ruff check . --fix +``` + +- Lint with ruff: + +```bash +uv run ruff check . +``` + +- Test with [pytest](https://github.com/pytest-dev/pytest) + +```bash +uv pip install pytest +uv run pytest . # Test +``` + +## YAML Specification + +## Examples + +## License + +dora-phi4's code are released under the MIT License diff --git a/node-hub/dora-phi4/dora_phi4/__init__.py b/node-hub/dora-phi4/dora_phi4/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-phi4/dora_phi4/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-phi4/dora_phi4/__main__.py b/node-hub/dora-phi4/dora_phi4/__main__.py new file mode 100644 index 00000000..bcbfde6d --- /dev/null +++ b/node-hub/dora-phi4/dora_phi4/__main__.py @@ -0,0 +1,5 @@ +from .main import main + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-phi4/dora_phi4/main.py b/node-hub/dora-phi4/dora_phi4/main.py new file mode 100644 index 00000000..21a9488b --- /dev/null +++ b/node-hub/dora-phi4/dora_phi4/main.py @@ -0,0 +1,90 @@ +from dora import Node +import pyarrow as pa +import requests +import torch +import io +from PIL import Image +import soundfile as sf +from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig +from urllib.request import urlopen + +# Load the model and processor +MODEL_PATH = "microsoft/Phi-4-multimodal-instruct" + +processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained( + MODEL_PATH, + device_map="cuda", + torch_dtype="auto", + trust_remote_code=True, + _attn_implementation="flash_attention_2", +).cuda() + +generation_config = GenerationConfig.from_pretrained(MODEL_PATH) + +# Define prompt structure +USER_PROMPT = "<|user|>" +ASSISTANT_PROMPT = "<|assistant|>" +PROMPT_SUFFIX = "<|end|>" + + +def process_image(image_url): + """Processes an image through the model and returns the response.""" + prompt = f"{USER_PROMPT}<|image_1|>What is shown in this image?{PROMPT_SUFFIX}{ASSISTANT_PROMPT}" + + # Download and open image + image = Image.open(requests.get(image_url, stream=True).raw) + + # Process input + inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda:0") + + # Generate response + generate_ids = model.generate(**inputs, max_new_tokens=1000, generation_config=generation_config) + generate_ids = generate_ids[:, inputs["input_ids"].shape[1]:] + + response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + return response + + +def process_audio(audio_url): + """Processes an audio file through the model and returns the transcript + translation.""" + speech_prompt = "Transcribe the audio to text, and then translate the audio to French. Use as a separator." + prompt = f"{USER_PROMPT}<|audio_1|>{speech_prompt}{PROMPT_SUFFIX}{ASSISTANT_PROMPT}" + + # Download and read audio file + audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read())) + + # Process input + inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to("cuda:0") + + # Generate response + generate_ids = model.generate(**inputs, max_new_tokens=1000, generation_config=generation_config) + generate_ids = generate_ids[:, inputs["input_ids"].shape[1]:] + + response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + return response + + +def main(): + node = Node() + + for event in node: + if event["type"] == "INPUT": + input_id = event["id"] + value = event["value"] + + print(f"Received event: {input_id}, value: {value}") + + # Check if it's an image URL + if input_id == "image_input": + image_response = process_image(value.as_py()) # Convert from PyArrow + node.send_output(output_id="image_output", data=pa.array([image_response])) + + # Check if it's an audio URL + elif input_id == "audio_input": + audio_response = process_audio(value.as_py()) # Convert from PyArrow + node.send_output(output_id="audio_output", data=pa.array([audio_response])) + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-phi4/pyproject.toml b/node-hub/dora-phi4/pyproject.toml new file mode 100644 index 00000000..5f5bb405 --- /dev/null +++ b/node-hub/dora-phi4/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "dora-phi4" +version = "0.0.0" +authors = [{ name = "Somay", email = "ssomay2002@gmail.com" }] +description = "DORA node for Phi-4 multimodal model" +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.8" + +dependencies = [ + "dora-rs >=0.3.9", + "torch==2.6.0", + "torchvision==0.21.0", + "flash_attn==2.7.4.post1", + "transformers==4.48.2", + "accelerate==1.3.0", + "soundfile==0.13.1", + "pillow==11.1.0", + "scipy==1.15.2", + "backoff==2.2.1", + "peft==0.13.2", + "requests" +] + +[tool.setuptools] +packages = ["dora_phi4"] + +[dependency-groups] +dev = ["pytest >=8.1.1", "ruff >=0.9.1"] + +[project.scripts] +dora-phi4 = "dora_phi4.main:main" diff --git a/node-hub/dora-phi4/tests/test_dora_phi4.py b/node-hub/dora-phi4/tests/test_dora_phi4.py new file mode 100644 index 00000000..fc0a3883 --- /dev/null +++ b/node-hub/dora-phi4/tests/test_dora_phi4.py @@ -0,0 +1,9 @@ +import pytest + + +def test_import_main(): + from dora_phi4.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main()