| @@ -0,0 +1,104 @@ | |||
| # dora-lmdeploy | |||
| ## Getting started | |||
| - Install it with uv: | |||
| ```bash | |||
| uv venv -p 3.11 --seed | |||
| uv pip install -e . | |||
| ``` | |||
| ## Contribution Guide | |||
| - Format with [ruff](https://docs.astral.sh/ruff/): | |||
| ```bash | |||
| uv pip install ruff | |||
| uv run ruff check . --fix | |||
| ``` | |||
| - Lint with ruff: | |||
| ```bash | |||
| uv run ruff check . | |||
| ``` | |||
| - Test with [pytest](https://github.com/pytest-dev/pytest) | |||
| ```bash | |||
| uv pip install pytest | |||
| uv run pytest . # Test | |||
| ``` | |||
| ## YAML Specification | |||
| This node can be used as follows: | |||
| ```yaml | |||
| - id: dora-lmdeploy | |||
| build: pip install dora-lmdeploy | |||
| path: dora-lmdeploy | |||
| inputs: | |||
| text: | |||
| source: dora-distil-whisper/text # Optional text input | |||
| queue_size: 1 | |||
| image: | |||
| source: camera/image # Optional image input | |||
| queue_size: 1 | |||
| outputs: | |||
| - text # Model's response | |||
| env: | |||
| MODEL_NAME: "internlm/internlm2-7b" # Default model, can be changed | |||
| MAX_LENGTH: 2048 # Maximum length of generated text | |||
| TEMPERATURE: 0.7 # Sampling temperature | |||
| TOP_P: 0.9 # Top-p sampling parameter | |||
| SYSTEM_PROMPT: "You are a helpful AI assistant." # Optional system prompt | |||
| DEFAULT_QUESTION: "Describe this image" # Default question when no text input is provided | |||
| TURBOMIND_CACHE_DIR: "./workspace" # Cache directory for Turbomind | |||
| TURBOMIND_TP: 1 # Tensor parallelism degree | |||
| TURBOMIND_GPU_MEMORY_FRACTION: 0.8 # GPU memory fraction to use | |||
| ``` | |||
| ### Available Models | |||
| The node supports various models that can be specified in the `MODEL_NAME` environment variable. Some examples: | |||
| - `internlm/internlm2-7b` | |||
| - `internlm/internlm2-20b` | |||
| - `internlm/internlm2-7b-chat` | |||
| - `internlm/internlm2-20b-chat` | |||
| - `Qwen/Qwen2-7B` | |||
| - `Qwen/Qwen2-14B` | |||
| ### Input/Output | |||
| - **Inputs**: | |||
| - `text`: Optional text input for text-only or multimodal tasks. If not provided, uses DEFAULT_QUESTION | |||
| - `image`: Optional image input for vision-language tasks. Supports multiple formats: | |||
| - Raw image formats: bgr8, rgb8 | |||
| - File formats: jpeg, jpg, jpe, bmp, webp, png | |||
| - **Outputs**: | |||
| - `text`: The model's generated response with metadata containing the image_id | |||
| ### Environment Variables | |||
| - `MODEL_NAME`: Name of the model to use (default: "internlm/internlm2-7b") | |||
| - `MAX_LENGTH`: Maximum length of generated text (default: 2048) | |||
| - `TEMPERATURE`: Sampling temperature (default: 0.7) | |||
| - `TOP_P`: Top-p sampling parameter (default: 0.9) | |||
| - `SYSTEM_PROMPT`: Optional system prompt to guide model behavior | |||
| - `DEFAULT_QUESTION`: Default question to use when no text input is provided | |||
| - `TURBOMIND_CACHE_DIR`: Directory for storing Turbomind cache (default: "./workspace") | |||
| - `TURBOMIND_TP`: Tensor parallelism degree (default: 1) | |||
| - `TURBOMIND_GPU_MEMORY_FRACTION`: GPU memory fraction to use (default: 0.8) | |||
| ### Features | |||
| - Efficient inference using LMDeploy's Turbomind engine | |||
| - Support for multimodal inputs (text + image) | |||
| - Conversation history tracking | |||
| - Automatic image format conversion and processing | |||
| - Configurable model parameters and generation settings | |||
| - GPU memory optimization through Turbomind | |||
| ## Examples | |||
| ## License | |||
| dora-lmdeploy's code are released under the MIT License | |||
| @@ -0,0 +1,13 @@ | |||
| """TODO: Add docstring.""" | |||
| import os | |||
| # Define the path to the README file relative to the package directory | |||
| readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") | |||
| # Read the content of the README file | |||
| try: | |||
| with open(readme_path, encoding="utf-8") as f: | |||
| __doc__ = f.read() | |||
| except FileNotFoundError: | |||
| __doc__ = "README file not found." | |||
| @@ -0,0 +1,6 @@ | |||
| """TODO: Add docstring.""" | |||
| from .main import main | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,3 @@ | |||
| { | |||
| "chat_template": "{% set image_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\n{{ system_prompt }}<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Image {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" | |||
| } | |||
| @@ -0,0 +1,151 @@ | |||
| """TODO: Add docstring.""" | |||
| import os | |||
| import cv2 | |||
| import numpy as np | |||
| import pyarrow as pa | |||
| from dora import Node | |||
| from lmdeploy import TurbomindEngineConfig, pipeline | |||
| from PIL import Image | |||
| # Default model configuration | |||
| DEFAULT_MODEL = "internlm/internlm2-7b" | |||
| MODEL_NAME = os.getenv("MODEL_NAME", DEFAULT_MODEL) | |||
| # System prompt and default question | |||
| SYSTEM_PROMPT = os.getenv( | |||
| "SYSTEM_PROMPT", | |||
| "You're a very succinct AI assistant, that describes image with a very short sentence.", | |||
| ) | |||
| DEFAULT_QUESTION = os.getenv( | |||
| "DEFAULT_QUESTION", | |||
| "Describe this image", | |||
| ) | |||
| # Turbomind configuration | |||
| TURBOMIND_CACHE_DIR = os.getenv("TURBOMIND_CACHE_DIR", "./workspace") | |||
| TURBOMIND_TP = int(os.getenv("TURBOMIND_TP", "1")) | |||
| TURBOMIND_GPU_MEMORY_FRACTION = float(os.getenv("TURBOMIND_GPU_MEMORY_FRACTION", "0.8")) | |||
| # Generation parameters | |||
| MAX_LENGTH = int(os.getenv("MAX_LENGTH", "2048")) | |||
| TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7")) | |||
| TOP_P = float(os.getenv("TOP_P", "0.9")) | |||
| # Initialize Turbomind engine config | |||
| engine_config = TurbomindEngineConfig( | |||
| model_name=MODEL_NAME, | |||
| tp=TURBOMIND_TP, | |||
| cache_max_entry_count=0.8, | |||
| gpu_memory_fraction=TURBOMIND_GPU_MEMORY_FRACTION, | |||
| cache_dir=TURBOMIND_CACHE_DIR, | |||
| ) | |||
| # Initialize pipeline | |||
| pipe = pipeline( | |||
| model_path=MODEL_NAME, | |||
| engine_config=engine_config, | |||
| max_length=MAX_LENGTH, | |||
| temperature=TEMPERATURE, | |||
| top_p=TOP_P, | |||
| ) | |||
| def process_image(storage, metadata): | |||
| """Process image data from storage and metadata.""" | |||
| encoding = metadata["encoding"] | |||
| width = metadata["width"] | |||
| height = metadata["height"] | |||
| if encoding in ["bgr8", "rgb8"] or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: | |||
| channels = 3 | |||
| storage_type = np.uint8 | |||
| else: | |||
| raise RuntimeError(f"Unsupported image encoding: {encoding}") | |||
| if encoding == "bgr8": | |||
| frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels)) | |||
| frame = frame[:, :, ::-1] # BGR to RGB | |||
| elif encoding == "rgb8": | |||
| frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels)) | |||
| elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]: | |||
| storage = storage.to_numpy() | |||
| frame = cv2.imdecode(storage, cv2.IMREAD_COLOR) | |||
| frame = frame[:, :, ::-1] # BGR to RGB | |||
| else: | |||
| raise RuntimeError(f"Unsupported image encoding: {encoding}") | |||
| return Image.fromarray(frame) | |||
| def generate_response(image, text, history=None): | |||
| """Generate response using LMDeploy pipeline.""" | |||
| if history is None: | |||
| history = [] | |||
| # Prepare the prompt | |||
| if SYSTEM_PROMPT: | |||
| history = [{"role": "system", "content": SYSTEM_PROMPT}] + history | |||
| # Add the current interaction | |||
| messages = history + [ | |||
| { | |||
| "role": "user", | |||
| "content": [ | |||
| {"type": "image", "image": image}, | |||
| {"type": "text", "text": text}, | |||
| ], | |||
| }, | |||
| ] | |||
| # Generate response using pipeline | |||
| response = pipe(messages) | |||
| return response.text, history + [{"role": "assistant", "content": response.text}] | |||
| def main(): | |||
| """TODO: Add docstring.""" | |||
| node = Node() | |||
| history = [] | |||
| cached_text = DEFAULT_QUESTION | |||
| current_image = None | |||
| for event in node: | |||
| event_type = event["type"] | |||
| if event_type == "INPUT": | |||
| event_id = event["id"] | |||
| if "image" in event_id: | |||
| # Process image input | |||
| current_image = process_image(event["value"], event["metadata"]) | |||
| elif "text" in event_id: | |||
| # Process text input | |||
| if len(event["value"]) > 0: | |||
| text = event["value"][0].as_py() | |||
| else: | |||
| text = cached_text | |||
| cached_text = text | |||
| if current_image is None: | |||
| continue | |||
| # Generate response | |||
| response, history = generate_response(current_image, text, history) | |||
| # Send output | |||
| node.send_output( | |||
| "text", | |||
| pa.array([response]), | |||
| {"image_id": event_id}, | |||
| ) | |||
| elif event_type == "ERROR": | |||
| print("Event Error:" + event["error"]) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,52 @@ | |||
| [project] | |||
| name = "dora-lmdeploy" | |||
| version = "0.3.10" | |||
| authors = [ | |||
| { name = "Somay", email = "ssomay2002@gmail.com" }, | |||
| ] | |||
| description = "Dora Node for LMDeploy with Turbomind" | |||
| license = { text = "MIT" } | |||
| readme = "README.md" | |||
| requires-python = ">=3.9" | |||
| dependencies = [ | |||
| "dora-rs >= 0.3.9", | |||
| "numpy < 2.0.0", | |||
| "torch == 2.4.0", | |||
| "torchvision >= 0.19", | |||
| "torchaudio >= 2.1.0", | |||
| "opencv-python >= 4.1.1", | |||
| "lmdeploy>=0.3.0", | |||
| "setuptools>=65.0.0", | |||
| ] | |||
| # Currently flash_attn is not supported as a pip install within uv. | |||
| # [[tool.uv.dependency-metadata]] | |||
| # name = "flash-attn" | |||
| # version = "2.7.1" | |||
| # requires = ["setuptools", "torch"] | |||
| # [tool.uv] | |||
| # no-build-isolation-package = ['flash-attn'] | |||
| [dependency-groups] | |||
| dev = ["pytest >=8.1.1", "ruff >=0.9.1"] | |||
| [project.scripts] | |||
| dora-lmdeploy = "dora_lmdeploy.main:main" | |||
| [build-system] | |||
| requires = ["setuptools", "setuptools-scm"] | |||
| build-backend = "setuptools.build_meta" | |||
| [tool.ruff.lint] | |||
| extend-select = [ | |||
| "D", # pydocstyle | |||
| "UP", # Ruff's UP rule | |||
| "PERF", # Ruff's PERF rule | |||
| "RET", # Ruff's RET rule | |||
| "RSE", # Ruff's RSE rule | |||
| "NPY", # Ruff's NPY rule | |||
| "N", # Ruff's N rule | |||
| "I", # Ruff's I rule | |||
| ] | |||
| @@ -0,0 +1,12 @@ | |||
| """TODO: Add docstring.""" | |||
| import pytest | |||
| def test_import_main(): | |||
| """TODO: Add docstring.""" | |||
| from dora_lmdeploy.main import main | |||
| # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. | |||
| with pytest.raises(RuntimeError): | |||
| main() | |||