Merge 82071cdbf0 into 77c277910b

7 months ago · 9f7c8280bf
--- a/node-hub/dora-lmdeploy/README.md
+++ b/node-hub/dora-lmdeploy/README.md
@@ -0,0 +1,104 @@
 # dora-lmdeploy

 ## Getting started

 - Install it with uv:

 ```bash
 uv venv -p 3.11 --seed
 uv pip install -e .
 ```

 ## Contribution Guide

 - Format with [ruff](https://docs.astral.sh/ruff/):

 ```bash
 uv pip install ruff
 uv run ruff check . --fix
 ```

 - Lint with ruff:

 ```bash
 uv run ruff check .
 ```

 - Test with [pytest](https://github.com/pytest-dev/pytest)

 ```bash
 uv pip install pytest
 uv run pytest . # Test
 ```

 ## YAML Specification

 This node can be used as follows:

 ```yaml
 - id: dora-lmdeploy
  build: pip install dora-lmdeploy
  path: dora-lmdeploy
  inputs:
    text:
      source: dora-distil-whisper/text  # Optional text input
      queue_size: 1
    image:
      source: camera/image  # Optional image input
      queue_size: 1
  outputs:
    - text  # Model's response
  env:
    MODEL_NAME: "internlm/internlm2-7b"  # Default model, can be changed
    MAX_LENGTH: 2048  # Maximum length of generated text
    TEMPERATURE: 0.7  # Sampling temperature
    TOP_P: 0.9  # Top-p sampling parameter
    SYSTEM_PROMPT: "You are a helpful AI assistant."  # Optional system prompt
    DEFAULT_QUESTION: "Describe this image"  # Default question when no text input is provided
    TURBOMIND_CACHE_DIR: "./workspace"  # Cache directory for Turbomind
    TURBOMIND_TP: 1  # Tensor parallelism degree
    TURBOMIND_GPU_MEMORY_FRACTION: 0.8  # GPU memory fraction to use
 ```

 ### Available Models
 The node supports various models that can be specified in the `MODEL_NAME` environment variable. Some examples:
 - `internlm/internlm2-7b`
 - `internlm/internlm2-20b`
 - `internlm/internlm2-7b-chat`
 - `internlm/internlm2-20b-chat`
 - `Qwen/Qwen2-7B`
 - `Qwen/Qwen2-14B`

 ### Input/Output
 - **Inputs**:
  - `text`: Optional text input for text-only or multimodal tasks. If not provided, uses DEFAULT_QUESTION
  - `image`: Optional image input for vision-language tasks. Supports multiple formats:
    - Raw image formats: bgr8, rgb8
    - File formats: jpeg, jpg, jpe, bmp, webp, png
 - **Outputs**:
  - `text`: The model's generated response with metadata containing the image_id

 ### Environment Variables
 - `MODEL_NAME`: Name of the model to use (default: "internlm/internlm2-7b")
 - `MAX_LENGTH`: Maximum length of generated text (default: 2048)
 - `TEMPERATURE`: Sampling temperature (default: 0.7)
 - `TOP_P`: Top-p sampling parameter (default: 0.9)
 - `SYSTEM_PROMPT`: Optional system prompt to guide model behavior
 - `DEFAULT_QUESTION`: Default question to use when no text input is provided
 - `TURBOMIND_CACHE_DIR`: Directory for storing Turbomind cache (default: "./workspace")
 - `TURBOMIND_TP`: Tensor parallelism degree (default: 1)
 - `TURBOMIND_GPU_MEMORY_FRACTION`: GPU memory fraction to use (default: 0.8)

 ### Features
 - Efficient inference using LMDeploy's Turbomind engine
 - Support for multimodal inputs (text + image)
 - Conversation history tracking
 - Automatic image format conversion and processing
 - Configurable model parameters and generation settings
 - GPU memory optimization through Turbomind

 ## Examples

 ## License

 dora-lmdeploy's code are released under the MIT License
--- a/node-hub/dora-lmdeploy/dora_lmdeploy/init.py
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/init.py
@@ -0,0 +1,13 @@
 """TODO: Add docstring."""

 import os

 # Define the path to the README file relative to the package directory
 readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

 # Read the content of the README file
 try:
    with open(readme_path, encoding="utf-8") as f:
        __doc__ = f.read()
 except FileNotFoundError:
    __doc__ = "README file not found."
--- a/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
@@ -0,0 +1,6 @@
 """TODO: Add docstring."""

 from .main import main

 if __name__ == "__main__":
    main()
--- a/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
@@ -0,0 +1,3 @@
 {
    "chat_template": "{% set image_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\n{{ system_prompt }}<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Image {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
 }
--- a/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
@@ -0,0 +1,151 @@
 """TODO: Add docstring."""

 import os

 import cv2
 import numpy as np
 import pyarrow as pa
 from dora import Node
 from lmdeploy import TurbomindEngineConfig, pipeline
 from PIL import Image

 # Default model configuration
 DEFAULT_MODEL = "internlm/internlm2-7b"
 MODEL_NAME = os.getenv("MODEL_NAME", DEFAULT_MODEL)

 # System prompt and default question
 SYSTEM_PROMPT = os.getenv(
    "SYSTEM_PROMPT",
    "You're a very succinct AI assistant, that describes image with a very short sentence.",
 )
 DEFAULT_QUESTION = os.getenv(
    "DEFAULT_QUESTION",
    "Describe this image",
 )

 # Turbomind configuration
 TURBOMIND_CACHE_DIR = os.getenv("TURBOMIND_CACHE_DIR", "./workspace")
 TURBOMIND_TP = int(os.getenv("TURBOMIND_TP", "1"))
 TURBOMIND_GPU_MEMORY_FRACTION = float(os.getenv("TURBOMIND_GPU_MEMORY_FRACTION", "0.8"))

 # Generation parameters
 MAX_LENGTH = int(os.getenv("MAX_LENGTH", "2048"))
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
 TOP_P = float(os.getenv("TOP_P", "0.9"))

 # Initialize Turbomind engine config
 engine_config = TurbomindEngineConfig(
    model_name=MODEL_NAME,
    tp=TURBOMIND_TP,
    cache_max_entry_count=0.8,
    gpu_memory_fraction=TURBOMIND_GPU_MEMORY_FRACTION,
    cache_dir=TURBOMIND_CACHE_DIR,
 )

 # Initialize pipeline
 pipe = pipeline(
    model_path=MODEL_NAME,
    engine_config=engine_config,
    max_length=MAX_LENGTH,
    temperature=TEMPERATURE,
    top_p=TOP_P,
 )


 def process_image(storage, metadata):
    """Process image data from storage and metadata."""
    encoding = metadata["encoding"]
    width = metadata["width"]
    height = metadata["height"]

    if encoding in ["bgr8", "rgb8"] or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
        channels = 3
        storage_type = np.uint8
    else:
        raise RuntimeError(f"Unsupported image encoding: {encoding}")

    if encoding == "bgr8":
        frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
        frame = frame[:, :, ::-1]  # BGR to RGB
    elif encoding == "rgb8":
        frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
    elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
        storage = storage.to_numpy()
        frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
        frame = frame[:, :, ::-1]  # BGR to RGB
    else:
        raise RuntimeError(f"Unsupported image encoding: {encoding}")

    return Image.fromarray(frame)


 def generate_response(image, text, history=None):
    """Generate response using LMDeploy pipeline."""
    if history is None:
        history = []

    # Prepare the prompt
    if SYSTEM_PROMPT:
        history = [{"role": "system", "content": SYSTEM_PROMPT}] + history

    # Add the current interaction
    messages = history + [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": text},
            ],
        },
    ]

    # Generate response using pipeline
    response = pipe(messages)
    return response.text, history + [{"role": "assistant", "content": response.text}]


 def main():
    """TODO: Add docstring."""
    node = Node()
    history = []
    cached_text = DEFAULT_QUESTION
    current_image = None

    for event in node:
        event_type = event["type"]

        if event_type == "INPUT":
            event_id = event["id"]

            if "image" in event_id:
                # Process image input
                current_image = process_image(event["value"], event["metadata"])

            elif "text" in event_id:
                # Process text input
                if len(event["value"]) > 0:
                    text = event["value"][0].as_py()
                else:
                    text = cached_text

                cached_text = text

                if current_image is None:
                    continue

                # Generate response
                response, history = generate_response(current_image, text, history)

                # Send output
                node.send_output(
                    "text",
                    pa.array([response]),
                    {"image_id": event_id},
                )

        elif event_type == "ERROR":
            print("Event Error:" + event["error"])


 if __name__ == "__main__":
    main()
--- a/node-hub/dora-lmdeploy/pyproject.toml
+++ b/node-hub/dora-lmdeploy/pyproject.toml
@@ -0,0 +1,52 @@
 [project]
 name = "dora-lmdeploy"
 version = "0.3.10"
 authors = [
    { name = "Somay", email = "ssomay2002@gmail.com" },
 ]
 description = "Dora Node for LMDeploy with Turbomind"
 license = { text = "MIT" }
 readme = "README.md"
 requires-python = ">=3.9"

 dependencies = [
    "dora-rs >= 0.3.9",
    "numpy < 2.0.0",
    "torch == 2.4.0",
    "torchvision >= 0.19",
    "torchaudio >= 2.1.0",
    "opencv-python >= 4.1.1",
    "lmdeploy>=0.3.0",
    "setuptools>=65.0.0",
 ]

 # Currently flash_attn is not supported as a pip install within uv.
 # [[tool.uv.dependency-metadata]]
 # name = "flash-attn"
 # version = "2.7.1"
 # requires = ["setuptools", "torch"]

 # [tool.uv]
 # no-build-isolation-package = ['flash-attn']

 [dependency-groups]
 dev = ["pytest >=8.1.1", "ruff >=0.9.1"]

 [project.scripts]
 dora-lmdeploy = "dora_lmdeploy.main:main"

 [build-system]
 requires = ["setuptools", "setuptools-scm"]
 build-backend = "setuptools.build_meta"

 [tool.ruff.lint]
 extend-select = [
  "D",    # pydocstyle
  "UP",   # Ruff's UP rule
  "PERF", # Ruff's PERF rule
  "RET",  # Ruff's RET rule
  "RSE",  # Ruff's RSE rule
  "NPY",  # Ruff's NPY rule
  "N",    # Ruff's N rule
  "I",    # Ruff's I rule
 ]
--- a/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py
+++ b/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py
@@ -0,0 +1,12 @@
 """TODO: Add docstring."""

 import pytest


 def test_import_main():
    """TODO: Add docstring."""
    from dora_lmdeploy.main import main

    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
    with pytest.raises(RuntimeError):
        main()