Compare commits

...

7 Commits

7 changed files with 246 additions and 1 deletions
Split View
  1. +1
    -1
      .gitignore
  2. +40
    -0
      node-hub/dora-phi4/README.md
  3. +11
    -0
      node-hub/dora-phi4/dora_phi4/__init__.py
  4. +5
    -0
      node-hub/dora-phi4/dora_phi4/__main__.py
  5. +148
    -0
      node-hub/dora-phi4/dora_phi4/main.py
  6. +32
    -0
      node-hub/dora-phi4/pyproject.toml
  7. +9
    -0
      node-hub/dora-phi4/tests/test_dora_phi4.py

+ 1
- 1
.gitignore View File

@@ -179,4 +179,4 @@ out/
#Miscellaneous
yolo.yml

~*
~*

+ 40
- 0
node-hub/dora-phi4/README.md View File

@@ -0,0 +1,40 @@
# dora-phi4

## Getting started

- Install it with uv:

```bash
uv venv -p 3.11 --seed
uv pip install -e .
```

## Contribution Guide

- Format with [ruff](https://docs.astral.sh/ruff/):

```bash
uv pip install ruff
uv run ruff check . --fix
```

- Lint with ruff:

```bash
uv run ruff check .
```

- Test with [pytest](https://github.com/pytest-dev/pytest)

```bash
uv pip install pytest
uv run pytest . # Test
```

## YAML Specification

## Examples

## License

dora-phi4's code are released under the MIT License

+ 11
- 0
node-hub/dora-phi4/dora_phi4/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 5
- 0
node-hub/dora-phi4/dora_phi4/__main__.py View File

@@ -0,0 +1,5 @@
from .main import main


if __name__ == "__main__":
main()

+ 148
- 0
node-hub/dora-phi4/dora_phi4/main.py View File

@@ -0,0 +1,148 @@
import io
from urllib.request import urlopen

import pyarrow as pa
import requests
import soundfile as sf
import torch
from accelerate import infer_auto_device_map
from dora import Node
from PIL import Image
from transformers import (
AutoModelForCausalLM,
AutoProcessor,
GenerationConfig,
)

# 🔍 Detect the best available device
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16 # Use float16 for efficiency
elif torch.backends.mps.is_available():
device = "mps"
torch_dtype = torch.float16 # Reduce memory usage for MPS
else:
device = "cpu"
torch_dtype = torch.float32 # CPU uses float32


# Load the model and processor
MODEL_PATH = "microsoft/Phi-4-multimodal-instruct"

processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
# bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
# quantization_config=bnb_config,
torch_dtype=torch.float16
if device == "cuda"
else torch.bfloat16, # Use bfloat16 for CPU
trust_remote_code=True,
_attn_implementation="flash_attention_2"
if device == "cuda" and torch.cuda.get_device_properties(0).total_memory > 16e9
else "eager",
low_cpu_mem_usage=True,
)

# Infer and apply the device map before moving model
device_map = infer_auto_device_map(model)

model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
# quantization_config=bnb_config,
torch_dtype=torch.float16
if device == "cuda"
else torch.bfloat16, # Use bfloat16 for CPU
trust_remote_code=True,
_attn_implementation="flash_attention_2"
if device == "cuda" and torch.cuda.get_device_properties(0).total_memory > 16e9
else "eager",
low_cpu_mem_usage=True,
device_map=device_map,
)

generation_config = GenerationConfig.from_pretrained(MODEL_PATH)

# Define prompt structure
USER_PROMPT = "<|user|>"
ASSISTANT_PROMPT = "<|assistant|>"
PROMPT_SUFFIX = "<|end|>"


def process_image(image_url):
"""Processes an image through the model and returns the response."""
prompt = f"{USER_PROMPT}<|image_1|>What is shown in this image?{PROMPT_SUFFIX}{ASSISTANT_PROMPT}"

# Download and open image
image = Image.open(requests.get(image_url, stream=True).raw)

# Process input
inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)

# Generate response
with torch.no_grad():
generate_ids = model.generate(
**inputs, max_new_tokens=512, generation_config=generation_config
)
generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]

response = processor.batch_decode(
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
return response


def process_audio(audio_url):
"""Processes an audio file through the model and returns the transcript + translation."""
speech_prompt = "Transcribe the audio to text, and then translate the audio to French. Use <sep> as a separator."
prompt = f"{USER_PROMPT}<|audio_1|>{speech_prompt}{PROMPT_SUFFIX}{ASSISTANT_PROMPT}"

# Download and read audio file
audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read()))

# Process input
inputs = processor(
text=prompt, audios=[(audio, samplerate)], return_tensors="pt"
).to(model.device)

# Generate response
with torch.no_grad():
generate_ids = model.generate(
**inputs, max_new_tokens=512, generation_config=generation_config
)
generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]

response = processor.batch_decode(
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
return response


def main():
node = Node()

for event in node:
if event["type"] == "INPUT":
input_id = event["id"]
value = event["value"]

print(f"Received event: {input_id}, value: {value}")

# Check if it's an image URL
if input_id == "image_input":
image_response = process_image(value.as_py()) # Convert from PyArrow
node.send_output(
output_id="image_output", data=pa.array([image_response])
)

# Check if it's an audio URL
elif input_id == "audio_input":
audio_response = process_audio(value.as_py()) # Convert from PyArrow
node.send_output(
output_id="audio_output", data=pa.array([audio_response])
)


if __name__ == "__main__":
main()

+ 32
- 0
node-hub/dora-phi4/pyproject.toml View File

@@ -0,0 +1,32 @@
[project]
name = "dora-phi4"
version = "0.0.0"
authors = [{ name = "Somay", email = "ssomay2002@gmail.com" }]
description = "DORA node for Phi-4 multimodal model"
license = { text = "MIT" }
readme = "README.md"
requires-python = ">=3.10"

dependencies = [
"dora-rs>=0.3.9",
"torch==2.6.0",
"torchvision==0.21.0",
"transformers==4.48.2",
"accelerate==1.3.0",
"soundfile==0.13.1",
"pillow==11.1.0",
"scipy==1.15.2",
"backoff==2.2.1",
"peft==0.13.2",
"bitsandbytes>=0.42.0",
"requests"
]

[tool.setuptools]
packages = ["dora_phi4"]

[dependency-groups]
dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

[project.scripts]
dora-phi4 = "dora_phi4.main:main"

+ 9
- 0
node-hub/dora-phi4/tests/test_dora_phi4.py View File

@@ -0,0 +1,9 @@
import pytest


def test_import_main():
from dora_phi4.main import main

# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
with pytest.raises(RuntimeError):
main()

Loading…
Cancel
Save