From 82071cdbf04794c26e1e06d7bab7d720106a219a Mon Sep 17 00:00:00 2001 From: 7SOMAY Date: Thu, 10 Apr 2025 01:13:33 +0530 Subject: [PATCH] feat: add dora lmdeploy integration --- node-hub/dora-lmdeploy/README.md | 104 ++++++++++++ .../dora-lmdeploy/dora_lmdeploy/__init__.py | 13 ++ .../dora-lmdeploy/dora_lmdeploy/__main__.py | 6 + .../dora_lmdeploy/chat_template.json | 3 + node-hub/dora-lmdeploy/dora_lmdeploy/main.py | 151 ++++++++++++++++++ node-hub/dora-lmdeploy/pyproject.toml | 52 ++++++ .../dora-lmdeploy/tests/test_dora_lmdeploy.py | 12 ++ 7 files changed, 341 insertions(+) create mode 100644 node-hub/dora-lmdeploy/README.md create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/main.py create mode 100644 node-hub/dora-lmdeploy/pyproject.toml create mode 100644 node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py diff --git a/node-hub/dora-lmdeploy/README.md b/node-hub/dora-lmdeploy/README.md new file mode 100644 index 00000000..019f98ef --- /dev/null +++ b/node-hub/dora-lmdeploy/README.md @@ -0,0 +1,104 @@ +# dora-lmdeploy + +## Getting started + +- Install it with uv: + +```bash +uv venv -p 3.11 --seed +uv pip install -e . +``` + +## Contribution Guide + +- Format with [ruff](https://docs.astral.sh/ruff/): + +```bash +uv pip install ruff +uv run ruff check . --fix +``` + +- Lint with ruff: + +```bash +uv run ruff check . +``` + +- Test with [pytest](https://github.com/pytest-dev/pytest) + +```bash +uv pip install pytest +uv run pytest . # Test +``` + +## YAML Specification + +This node can be used as follows: + +```yaml +- id: dora-lmdeploy + build: pip install dora-lmdeploy + path: dora-lmdeploy + inputs: + text: + source: dora-distil-whisper/text # Optional text input + queue_size: 1 + image: + source: camera/image # Optional image input + queue_size: 1 + outputs: + - text # Model's response + env: + MODEL_NAME: "internlm/internlm2-7b" # Default model, can be changed + MAX_LENGTH: 2048 # Maximum length of generated text + TEMPERATURE: 0.7 # Sampling temperature + TOP_P: 0.9 # Top-p sampling parameter + SYSTEM_PROMPT: "You are a helpful AI assistant." # Optional system prompt + DEFAULT_QUESTION: "Describe this image" # Default question when no text input is provided + TURBOMIND_CACHE_DIR: "./workspace" # Cache directory for Turbomind + TURBOMIND_TP: 1 # Tensor parallelism degree + TURBOMIND_GPU_MEMORY_FRACTION: 0.8 # GPU memory fraction to use +``` + +### Available Models +The node supports various models that can be specified in the `MODEL_NAME` environment variable. Some examples: +- `internlm/internlm2-7b` +- `internlm/internlm2-20b` +- `internlm/internlm2-7b-chat` +- `internlm/internlm2-20b-chat` +- `Qwen/Qwen2-7B` +- `Qwen/Qwen2-14B` + +### Input/Output +- **Inputs**: + - `text`: Optional text input for text-only or multimodal tasks. If not provided, uses DEFAULT_QUESTION + - `image`: Optional image input for vision-language tasks. Supports multiple formats: + - Raw image formats: bgr8, rgb8 + - File formats: jpeg, jpg, jpe, bmp, webp, png +- **Outputs**: + - `text`: The model's generated response with metadata containing the image_id + +### Environment Variables +- `MODEL_NAME`: Name of the model to use (default: "internlm/internlm2-7b") +- `MAX_LENGTH`: Maximum length of generated text (default: 2048) +- `TEMPERATURE`: Sampling temperature (default: 0.7) +- `TOP_P`: Top-p sampling parameter (default: 0.9) +- `SYSTEM_PROMPT`: Optional system prompt to guide model behavior +- `DEFAULT_QUESTION`: Default question to use when no text input is provided +- `TURBOMIND_CACHE_DIR`: Directory for storing Turbomind cache (default: "./workspace") +- `TURBOMIND_TP`: Tensor parallelism degree (default: 1) +- `TURBOMIND_GPU_MEMORY_FRACTION`: GPU memory fraction to use (default: 0.8) + +### Features +- Efficient inference using LMDeploy's Turbomind engine +- Support for multimodal inputs (text + image) +- Conversation history tracking +- Automatic image format conversion and processing +- Configurable model parameters and generation settings +- GPU memory optimization through Turbomind + +## Examples + +## License + +dora-lmdeploy's code are released under the MIT License diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py b/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py new file mode 100644 index 00000000..79cbf370 --- /dev/null +++ b/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py @@ -0,0 +1,13 @@ +"""TODO: Add docstring.""" + +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py b/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py new file mode 100644 index 00000000..51a1554d --- /dev/null +++ b/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py @@ -0,0 +1,6 @@ +"""TODO: Add docstring.""" + +from .main import main + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json b/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json new file mode 100644 index 00000000..8f032ae8 --- /dev/null +++ b/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json @@ -0,0 +1,3 @@ +{ + "chat_template": "{% set image_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\n{{ system_prompt }}<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Image {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" +} \ No newline at end of file diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/main.py b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py new file mode 100644 index 00000000..6cd31e6d --- /dev/null +++ b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py @@ -0,0 +1,151 @@ +"""TODO: Add docstring.""" + +import os + +import cv2 +import numpy as np +import pyarrow as pa +from dora import Node +from lmdeploy import TurbomindEngineConfig, pipeline +from PIL import Image + +# Default model configuration +DEFAULT_MODEL = "internlm/internlm2-7b" +MODEL_NAME = os.getenv("MODEL_NAME", DEFAULT_MODEL) + +# System prompt and default question +SYSTEM_PROMPT = os.getenv( + "SYSTEM_PROMPT", + "You're a very succinct AI assistant, that describes image with a very short sentence.", +) +DEFAULT_QUESTION = os.getenv( + "DEFAULT_QUESTION", + "Describe this image", +) + +# Turbomind configuration +TURBOMIND_CACHE_DIR = os.getenv("TURBOMIND_CACHE_DIR", "./workspace") +TURBOMIND_TP = int(os.getenv("TURBOMIND_TP", "1")) +TURBOMIND_GPU_MEMORY_FRACTION = float(os.getenv("TURBOMIND_GPU_MEMORY_FRACTION", "0.8")) + +# Generation parameters +MAX_LENGTH = int(os.getenv("MAX_LENGTH", "2048")) +TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7")) +TOP_P = float(os.getenv("TOP_P", "0.9")) + +# Initialize Turbomind engine config +engine_config = TurbomindEngineConfig( + model_name=MODEL_NAME, + tp=TURBOMIND_TP, + cache_max_entry_count=0.8, + gpu_memory_fraction=TURBOMIND_GPU_MEMORY_FRACTION, + cache_dir=TURBOMIND_CACHE_DIR, +) + +# Initialize pipeline +pipe = pipeline( + model_path=MODEL_NAME, + engine_config=engine_config, + max_length=MAX_LENGTH, + temperature=TEMPERATURE, + top_p=TOP_P, +) + + +def process_image(storage, metadata): + """Process image data from storage and metadata.""" + encoding = metadata["encoding"] + width = metadata["width"] + height = metadata["height"] + + if encoding in ["bgr8", "rgb8"] or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + channels = 3 + storage_type = np.uint8 + else: + raise RuntimeError(f"Unsupported image encoding: {encoding}") + + if encoding == "bgr8": + frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels)) + frame = frame[:, :, ::-1] # BGR to RGB + elif encoding == "rgb8": + frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels)) + elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: + storage = storage.to_numpy() + frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) + frame = frame[:, :, ::-1] # BGR to RGB + else: + raise RuntimeError(f"Unsupported image encoding: {encoding}") + + return Image.fromarray(frame) + + +def generate_response(image, text, history=None): + """Generate response using LMDeploy pipeline.""" + if history is None: + history = [] + + # Prepare the prompt + if SYSTEM_PROMPT: + history = [{"role": "system", "content": SYSTEM_PROMPT}] + history + + # Add the current interaction + messages = history + [ + { + "role": "user", + "content": [ + {"type": "image", "image": image}, + {"type": "text", "text": text}, + ], + }, + ] + + # Generate response using pipeline + response = pipe(messages) + return response.text, history + [{"role": "assistant", "content": response.text}] + + +def main(): + """TODO: Add docstring.""" + node = Node() + history = [] + cached_text = DEFAULT_QUESTION + current_image = None + + for event in node: + event_type = event["type"] + + if event_type == "INPUT": + event_id = event["id"] + + if "image" in event_id: + # Process image input + current_image = process_image(event["value"], event["metadata"]) + + elif "text" in event_id: + # Process text input + if len(event["value"]) > 0: + text = event["value"][0].as_py() + else: + text = cached_text + + cached_text = text + + if current_image is None: + continue + + # Generate response + response, history = generate_response(current_image, text, history) + + # Send output + node.send_output( + "text", + pa.array([response]), + {"image_id": event_id}, + ) + + elif event_type == "ERROR": + print("Event Error:" + event["error"]) + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-lmdeploy/pyproject.toml b/node-hub/dora-lmdeploy/pyproject.toml new file mode 100644 index 00000000..c231e4eb --- /dev/null +++ b/node-hub/dora-lmdeploy/pyproject.toml @@ -0,0 +1,52 @@ +[project] +name = "dora-lmdeploy" +version = "0.3.10" +authors = [ + { name = "Somay", email = "ssomay2002@gmail.com" }, +] +description = "Dora Node for LMDeploy with Turbomind" +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.9" + +dependencies = [ + "dora-rs >= 0.3.9", + "numpy < 2.0.0", + "torch == 2.4.0", + "torchvision >= 0.19", + "torchaudio >= 2.1.0", + "opencv-python >= 4.1.1", + "lmdeploy>=0.3.0", + "setuptools>=65.0.0", +] + +# Currently flash_attn is not supported as a pip install within uv. +# [[tool.uv.dependency-metadata]] +# name = "flash-attn" +# version = "2.7.1" +# requires = ["setuptools", "torch"] + +# [tool.uv] +# no-build-isolation-package = ['flash-attn'] + +[dependency-groups] +dev = ["pytest >=8.1.1", "ruff >=0.9.1"] + +[project.scripts] +dora-lmdeploy = "dora_lmdeploy.main:main" + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.ruff.lint] +extend-select = [ + "D", # pydocstyle + "UP", # Ruff's UP rule + "PERF", # Ruff's PERF rule + "RET", # Ruff's RET rule + "RSE", # Ruff's RSE rule + "NPY", # Ruff's NPY rule + "N", # Ruff's N rule + "I", # Ruff's I rule +] diff --git a/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py b/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py new file mode 100644 index 00000000..4ff8bc1c --- /dev/null +++ b/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py @@ -0,0 +1,12 @@ +"""TODO: Add docstring.""" + +import pytest + + +def test_import_main(): + """TODO: Add docstring.""" + from dora_lmdeploy.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main()