Browse Source

Merge 82071cdbf0 into 77c277910b

pull/919/merge
7SOMAY GitHub 5 months ago
parent
commit
9f7c8280bf
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
7 changed files with 341 additions and 0 deletions
  1. +104
    -0
      node-hub/dora-lmdeploy/README.md
  2. +13
    -0
      node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py
  3. +6
    -0
      node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py
  4. +3
    -0
      node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
  5. +151
    -0
      node-hub/dora-lmdeploy/dora_lmdeploy/main.py
  6. +52
    -0
      node-hub/dora-lmdeploy/pyproject.toml
  7. +12
    -0
      node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py

+ 104
- 0
node-hub/dora-lmdeploy/README.md View File

@@ -0,0 +1,104 @@
# dora-lmdeploy

## Getting started

- Install it with uv:

```bash
uv venv -p 3.11 --seed
uv pip install -e .
```

## Contribution Guide

- Format with [ruff](https://docs.astral.sh/ruff/):

```bash
uv pip install ruff
uv run ruff check . --fix
```

- Lint with ruff:

```bash
uv run ruff check .
```

- Test with [pytest](https://github.com/pytest-dev/pytest)

```bash
uv pip install pytest
uv run pytest . # Test
```

## YAML Specification

This node can be used as follows:

```yaml
- id: dora-lmdeploy
build: pip install dora-lmdeploy
path: dora-lmdeploy
inputs:
text:
source: dora-distil-whisper/text # Optional text input
queue_size: 1
image:
source: camera/image # Optional image input
queue_size: 1
outputs:
- text # Model's response
env:
MODEL_NAME: "internlm/internlm2-7b" # Default model, can be changed
MAX_LENGTH: 2048 # Maximum length of generated text
TEMPERATURE: 0.7 # Sampling temperature
TOP_P: 0.9 # Top-p sampling parameter
SYSTEM_PROMPT: "You are a helpful AI assistant." # Optional system prompt
DEFAULT_QUESTION: "Describe this image" # Default question when no text input is provided
TURBOMIND_CACHE_DIR: "./workspace" # Cache directory for Turbomind
TURBOMIND_TP: 1 # Tensor parallelism degree
TURBOMIND_GPU_MEMORY_FRACTION: 0.8 # GPU memory fraction to use
```

### Available Models
The node supports various models that can be specified in the `MODEL_NAME` environment variable. Some examples:
- `internlm/internlm2-7b`
- `internlm/internlm2-20b`
- `internlm/internlm2-7b-chat`
- `internlm/internlm2-20b-chat`
- `Qwen/Qwen2-7B`
- `Qwen/Qwen2-14B`

### Input/Output
- **Inputs**:
- `text`: Optional text input for text-only or multimodal tasks. If not provided, uses DEFAULT_QUESTION
- `image`: Optional image input for vision-language tasks. Supports multiple formats:
- Raw image formats: bgr8, rgb8
- File formats: jpeg, jpg, jpe, bmp, webp, png
- **Outputs**:
- `text`: The model's generated response with metadata containing the image_id

### Environment Variables
- `MODEL_NAME`: Name of the model to use (default: "internlm/internlm2-7b")
- `MAX_LENGTH`: Maximum length of generated text (default: 2048)
- `TEMPERATURE`: Sampling temperature (default: 0.7)
- `TOP_P`: Top-p sampling parameter (default: 0.9)
- `SYSTEM_PROMPT`: Optional system prompt to guide model behavior
- `DEFAULT_QUESTION`: Default question to use when no text input is provided
- `TURBOMIND_CACHE_DIR`: Directory for storing Turbomind cache (default: "./workspace")
- `TURBOMIND_TP`: Tensor parallelism degree (default: 1)
- `TURBOMIND_GPU_MEMORY_FRACTION`: GPU memory fraction to use (default: 0.8)

### Features
- Efficient inference using LMDeploy's Turbomind engine
- Support for multimodal inputs (text + image)
- Conversation history tracking
- Automatic image format conversion and processing
- Configurable model parameters and generation settings
- GPU memory optimization through Turbomind

## Examples

## License

dora-lmdeploy's code are released under the MIT License

+ 13
- 0
node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py View File

@@ -0,0 +1,13 @@
"""TODO: Add docstring."""

import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 6
- 0
node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py View File

@@ -0,0 +1,6 @@
"""TODO: Add docstring."""

from .main import main

if __name__ == "__main__":
main()

+ 3
- 0
node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json View File

@@ -0,0 +1,3 @@
{
"chat_template": "{% set image_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\n{{ system_prompt }}<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Image {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
}

+ 151
- 0
node-hub/dora-lmdeploy/dora_lmdeploy/main.py View File

@@ -0,0 +1,151 @@
"""TODO: Add docstring."""

import os

import cv2
import numpy as np
import pyarrow as pa
from dora import Node
from lmdeploy import TurbomindEngineConfig, pipeline
from PIL import Image

# Default model configuration
DEFAULT_MODEL = "internlm/internlm2-7b"
MODEL_NAME = os.getenv("MODEL_NAME", DEFAULT_MODEL)

# System prompt and default question
SYSTEM_PROMPT = os.getenv(
"SYSTEM_PROMPT",
"You're a very succinct AI assistant, that describes image with a very short sentence.",
)
DEFAULT_QUESTION = os.getenv(
"DEFAULT_QUESTION",
"Describe this image",
)

# Turbomind configuration
TURBOMIND_CACHE_DIR = os.getenv("TURBOMIND_CACHE_DIR", "./workspace")
TURBOMIND_TP = int(os.getenv("TURBOMIND_TP", "1"))
TURBOMIND_GPU_MEMORY_FRACTION = float(os.getenv("TURBOMIND_GPU_MEMORY_FRACTION", "0.8"))

# Generation parameters
MAX_LENGTH = int(os.getenv("MAX_LENGTH", "2048"))
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
TOP_P = float(os.getenv("TOP_P", "0.9"))

# Initialize Turbomind engine config
engine_config = TurbomindEngineConfig(
model_name=MODEL_NAME,
tp=TURBOMIND_TP,
cache_max_entry_count=0.8,
gpu_memory_fraction=TURBOMIND_GPU_MEMORY_FRACTION,
cache_dir=TURBOMIND_CACHE_DIR,
)

# Initialize pipeline
pipe = pipeline(
model_path=MODEL_NAME,
engine_config=engine_config,
max_length=MAX_LENGTH,
temperature=TEMPERATURE,
top_p=TOP_P,
)


def process_image(storage, metadata):
"""Process image data from storage and metadata."""
encoding = metadata["encoding"]
width = metadata["width"]
height = metadata["height"]

if encoding in ["bgr8", "rgb8"] or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
channels = 3
storage_type = np.uint8
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

if encoding == "bgr8":
frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
frame = frame[:, :, ::-1] # BGR to RGB
elif encoding == "rgb8":
frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
storage = storage.to_numpy()
frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
frame = frame[:, :, ::-1] # BGR to RGB
else:
raise RuntimeError(f"Unsupported image encoding: {encoding}")

return Image.fromarray(frame)


def generate_response(image, text, history=None):
"""Generate response using LMDeploy pipeline."""
if history is None:
history = []

# Prepare the prompt
if SYSTEM_PROMPT:
history = [{"role": "system", "content": SYSTEM_PROMPT}] + history

# Add the current interaction
messages = history + [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": text},
],
},
]

# Generate response using pipeline
response = pipe(messages)
return response.text, history + [{"role": "assistant", "content": response.text}]


def main():
"""TODO: Add docstring."""
node = Node()
history = []
cached_text = DEFAULT_QUESTION
current_image = None

for event in node:
event_type = event["type"]

if event_type == "INPUT":
event_id = event["id"]

if "image" in event_id:
# Process image input
current_image = process_image(event["value"], event["metadata"])

elif "text" in event_id:
# Process text input
if len(event["value"]) > 0:
text = event["value"][0].as_py()
else:
text = cached_text

cached_text = text

if current_image is None:
continue

# Generate response
response, history = generate_response(current_image, text, history)

# Send output
node.send_output(
"text",
pa.array([response]),
{"image_id": event_id},
)

elif event_type == "ERROR":
print("Event Error:" + event["error"])


if __name__ == "__main__":
main()

+ 52
- 0
node-hub/dora-lmdeploy/pyproject.toml View File

@@ -0,0 +1,52 @@
[project]
name = "dora-lmdeploy"
version = "0.3.10"
authors = [
{ name = "Somay", email = "ssomay2002@gmail.com" },
]
description = "Dora Node for LMDeploy with Turbomind"
license = { text = "MIT" }
readme = "README.md"
requires-python = ">=3.9"

dependencies = [
"dora-rs >= 0.3.9",
"numpy < 2.0.0",
"torch == 2.4.0",
"torchvision >= 0.19",
"torchaudio >= 2.1.0",
"opencv-python >= 4.1.1",
"lmdeploy>=0.3.0",
"setuptools>=65.0.0",
]

# Currently flash_attn is not supported as a pip install within uv.
# [[tool.uv.dependency-metadata]]
# name = "flash-attn"
# version = "2.7.1"
# requires = ["setuptools", "torch"]

# [tool.uv]
# no-build-isolation-package = ['flash-attn']

[dependency-groups]
dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

[project.scripts]
dora-lmdeploy = "dora_lmdeploy.main:main"

[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.ruff.lint]
extend-select = [
"D", # pydocstyle
"UP", # Ruff's UP rule
"PERF", # Ruff's PERF rule
"RET", # Ruff's RET rule
"RSE", # Ruff's RSE rule
"NPY", # Ruff's NPY rule
"N", # Ruff's N rule
"I", # Ruff's I rule
]

+ 12
- 0
node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py View File

@@ -0,0 +1,12 @@
"""TODO: Add docstring."""

import pytest


def test_import_main():
"""TODO: Add docstring."""
from dora_lmdeploy.main import main

# Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
with pytest.raises(RuntimeError):
main()

Loading…
Cancel
Save