From 82071cdbf04794c26e1e06d7bab7d720106a219a Mon Sep 17 00:00:00 2001
From: 7SOMAY <ssomay2002@gmail.com>
Date: Thu, 10 Apr 2025 01:13:33 +0530
Subject: [PATCH] feat: add dora lmdeploy integration

---
 node-hub/dora-lmdeploy/README.md              | 104 ++++++++++++
 .../dora-lmdeploy/dora_lmdeploy/__init__.py   |  13 ++
 .../dora-lmdeploy/dora_lmdeploy/__main__.py   |   6 +
 .../dora_lmdeploy/chat_template.json          |   3 +
 node-hub/dora-lmdeploy/dora_lmdeploy/main.py  | 151 ++++++++++++++++++
 node-hub/dora-lmdeploy/pyproject.toml         |  52 ++++++
 .../dora-lmdeploy/tests/test_dora_lmdeploy.py |  12 ++
 7 files changed, 341 insertions(+)
 create mode 100644 node-hub/dora-lmdeploy/README.md
 create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py
 create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py
 create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
 create mode 100644 node-hub/dora-lmdeploy/dora_lmdeploy/main.py
 create mode 100644 node-hub/dora-lmdeploy/pyproject.toml
 create mode 100644 node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py

diff --git a/node-hub/dora-lmdeploy/README.md b/node-hub/dora-lmdeploy/README.md
new file mode 100644
index 00000000..019f98ef
--- /dev/null
+++ b/node-hub/dora-lmdeploy/README.md
@@ -0,0 +1,104 @@
+# dora-lmdeploy
+
+## Getting started
+
+- Install it with uv:
+
+```bash
+uv venv -p 3.11 --seed
+uv pip install -e .
+```
+
+## Contribution Guide
+
+- Format with [ruff](https://docs.astral.sh/ruff/):
+
+```bash
+uv pip install ruff
+uv run ruff check . --fix
+```
+
+- Lint with ruff:
+
+```bash
+uv run ruff check .
+```
+
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+
+```bash
+uv pip install pytest
+uv run pytest . # Test
+```
+
+## YAML Specification
+
+This node can be used as follows:
+
+```yaml
+- id: dora-lmdeploy
+  build: pip install dora-lmdeploy
+  path: dora-lmdeploy
+  inputs:
+    text:
+      source: dora-distil-whisper/text  # Optional text input
+      queue_size: 1
+    image:
+      source: camera/image  # Optional image input
+      queue_size: 1
+  outputs:
+    - text  # Model's response
+  env:
+    MODEL_NAME: "internlm/internlm2-7b"  # Default model, can be changed
+    MAX_LENGTH: 2048  # Maximum length of generated text
+    TEMPERATURE: 0.7  # Sampling temperature
+    TOP_P: 0.9  # Top-p sampling parameter
+    SYSTEM_PROMPT: "You are a helpful AI assistant."  # Optional system prompt
+    DEFAULT_QUESTION: "Describe this image"  # Default question when no text input is provided
+    TURBOMIND_CACHE_DIR: "./workspace"  # Cache directory for Turbomind
+    TURBOMIND_TP: 1  # Tensor parallelism degree
+    TURBOMIND_GPU_MEMORY_FRACTION: 0.8  # GPU memory fraction to use
+```
+
+### Available Models
+The node supports various models that can be specified in the `MODEL_NAME` environment variable. Some examples:
+- `internlm/internlm2-7b`
+- `internlm/internlm2-20b`
+- `internlm/internlm2-7b-chat`
+- `internlm/internlm2-20b-chat`
+- `Qwen/Qwen2-7B`
+- `Qwen/Qwen2-14B`
+
+### Input/Output
+- **Inputs**:
+  - `text`: Optional text input for text-only or multimodal tasks. If not provided, uses DEFAULT_QUESTION
+  - `image`: Optional image input for vision-language tasks. Supports multiple formats:
+    - Raw image formats: bgr8, rgb8
+    - File formats: jpeg, jpg, jpe, bmp, webp, png
+- **Outputs**:
+  - `text`: The model's generated response with metadata containing the image_id
+
+### Environment Variables
+- `MODEL_NAME`: Name of the model to use (default: "internlm/internlm2-7b")
+- `MAX_LENGTH`: Maximum length of generated text (default: 2048)
+- `TEMPERATURE`: Sampling temperature (default: 0.7)
+- `TOP_P`: Top-p sampling parameter (default: 0.9)
+- `SYSTEM_PROMPT`: Optional system prompt to guide model behavior
+- `DEFAULT_QUESTION`: Default question to use when no text input is provided
+- `TURBOMIND_CACHE_DIR`: Directory for storing Turbomind cache (default: "./workspace")
+- `TURBOMIND_TP`: Tensor parallelism degree (default: 1)
+- `TURBOMIND_GPU_MEMORY_FRACTION`: GPU memory fraction to use (default: 0.8)
+
+### Features
+- Efficient inference using LMDeploy's Turbomind engine
+- Support for multimodal inputs (text + image)
+- Conversation history tracking
+- Automatic image format conversion and processing
+- Configurable model parameters and generation settings
+- GPU memory optimization through Turbomind
+
+## Examples
+
+## License
+
+dora-lmdeploy's code are released under the MIT License
diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py b/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py
new file mode 100644
index 00000000..79cbf370
--- /dev/null
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/__init__.py
@@ -0,0 +1,13 @@
+"""TODO: Add docstring."""
+
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py b/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py
new file mode 100644
index 00000000..51a1554d
--- /dev/null
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/__main__.py
@@ -0,0 +1,6 @@
+"""TODO: Add docstring."""
+
+from .main import main
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json b/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
new file mode 100644
index 00000000..8f032ae8
--- /dev/null
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/chat_template.json
@@ -0,0 +1,3 @@
+{
+    "chat_template": "{% set image_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\n{{ system_prompt }}<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Image {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}
\ No newline at end of file
diff --git a/node-hub/dora-lmdeploy/dora_lmdeploy/main.py b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
new file mode 100644
index 00000000..6cd31e6d
--- /dev/null
+++ b/node-hub/dora-lmdeploy/dora_lmdeploy/main.py
@@ -0,0 +1,151 @@
+"""TODO: Add docstring."""
+
+import os
+
+import cv2
+import numpy as np
+import pyarrow as pa
+from dora import Node
+from lmdeploy import TurbomindEngineConfig, pipeline
+from PIL import Image
+
+# Default model configuration
+DEFAULT_MODEL = "internlm/internlm2-7b"
+MODEL_NAME = os.getenv("MODEL_NAME", DEFAULT_MODEL)
+
+# System prompt and default question
+SYSTEM_PROMPT = os.getenv(
+    "SYSTEM_PROMPT",
+    "You're a very succinct AI assistant, that describes image with a very short sentence.",
+)
+DEFAULT_QUESTION = os.getenv(
+    "DEFAULT_QUESTION",
+    "Describe this image",
+)
+
+# Turbomind configuration
+TURBOMIND_CACHE_DIR = os.getenv("TURBOMIND_CACHE_DIR", "./workspace")
+TURBOMIND_TP = int(os.getenv("TURBOMIND_TP", "1"))
+TURBOMIND_GPU_MEMORY_FRACTION = float(os.getenv("TURBOMIND_GPU_MEMORY_FRACTION", "0.8"))
+
+# Generation parameters
+MAX_LENGTH = int(os.getenv("MAX_LENGTH", "2048"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
+TOP_P = float(os.getenv("TOP_P", "0.9"))
+
+# Initialize Turbomind engine config
+engine_config = TurbomindEngineConfig(
+    model_name=MODEL_NAME,
+    tp=TURBOMIND_TP,
+    cache_max_entry_count=0.8,
+    gpu_memory_fraction=TURBOMIND_GPU_MEMORY_FRACTION,
+    cache_dir=TURBOMIND_CACHE_DIR,
+)
+
+# Initialize pipeline
+pipe = pipeline(
+    model_path=MODEL_NAME,
+    engine_config=engine_config,
+    max_length=MAX_LENGTH,
+    temperature=TEMPERATURE,
+    top_p=TOP_P,
+)
+
+
+def process_image(storage, metadata):
+    """Process image data from storage and metadata."""
+    encoding = metadata["encoding"]
+    width = metadata["width"]
+    height = metadata["height"]
+
+    if encoding in ["bgr8", "rgb8"] or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+        channels = 3
+        storage_type = np.uint8
+    else:
+        raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+    if encoding == "bgr8":
+        frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
+        frame = frame[:, :, ::-1]  # BGR to RGB
+    elif encoding == "rgb8":
+        frame = storage.to_numpy().astype(storage_type).reshape((height, width, channels))
+    elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+        storage = storage.to_numpy()
+        frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+        frame = frame[:, :, ::-1]  # BGR to RGB
+    else:
+        raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+    return Image.fromarray(frame)
+
+
+def generate_response(image, text, history=None):
+    """Generate response using LMDeploy pipeline."""
+    if history is None:
+        history = []
+
+    # Prepare the prompt
+    if SYSTEM_PROMPT:
+        history = [{"role": "system", "content": SYSTEM_PROMPT}] + history
+
+    # Add the current interaction
+    messages = history + [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": text},
+            ],
+        },
+    ]
+
+    # Generate response using pipeline
+    response = pipe(messages)
+    return response.text, history + [{"role": "assistant", "content": response.text}]
+
+
+def main():
+    """TODO: Add docstring."""
+    node = Node()
+    history = []
+    cached_text = DEFAULT_QUESTION
+    current_image = None
+
+    for event in node:
+        event_type = event["type"]
+
+        if event_type == "INPUT":
+            event_id = event["id"]
+
+            if "image" in event_id:
+                # Process image input
+                current_image = process_image(event["value"], event["metadata"])
+
+            elif "text" in event_id:
+                # Process text input
+                if len(event["value"]) > 0:
+                    text = event["value"][0].as_py()
+                else:
+                    text = cached_text
+
+                cached_text = text
+
+                if current_image is None:
+                    continue
+
+                # Generate response
+                response, history = generate_response(current_image, text, history)
+
+                # Send output
+                node.send_output(
+                    "text",
+                    pa.array([response]),
+                    {"image_id": event_id},
+                )
+
+        elif event_type == "ERROR":
+            print("Event Error:" + event["error"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-lmdeploy/pyproject.toml b/node-hub/dora-lmdeploy/pyproject.toml
new file mode 100644
index 00000000..c231e4eb
--- /dev/null
+++ b/node-hub/dora-lmdeploy/pyproject.toml
@@ -0,0 +1,52 @@
+[project]
+name = "dora-lmdeploy"
+version = "0.3.10"
+authors = [
+    { name = "Somay", email = "ssomay2002@gmail.com" },
+]
+description = "Dora Node for LMDeploy with Turbomind"
+license = { text = "MIT" }
+readme = "README.md"
+requires-python = ">=3.9"
+
+dependencies = [
+    "dora-rs >= 0.3.9",
+    "numpy < 2.0.0",
+    "torch == 2.4.0",
+    "torchvision >= 0.19",
+    "torchaudio >= 2.1.0",
+    "opencv-python >= 4.1.1",
+    "lmdeploy>=0.3.0",
+    "setuptools>=65.0.0",
+]
+
+# Currently flash_attn is not supported as a pip install within uv.
+# [[tool.uv.dependency-metadata]]
+# name = "flash-attn"
+# version = "2.7.1"
+# requires = ["setuptools", "torch"]
+
+# [tool.uv]
+# no-build-isolation-package = ['flash-attn']
+
+[dependency-groups]
+dev = ["pytest >=8.1.1", "ruff >=0.9.1"]
+
+[project.scripts]
+dora-lmdeploy = "dora_lmdeploy.main:main"
+
+[build-system]
+requires = ["setuptools", "setuptools-scm"]
+build-backend = "setuptools.build_meta"
+
+[tool.ruff.lint]
+extend-select = [
+  "D",    # pydocstyle
+  "UP",   # Ruff's UP rule
+  "PERF", # Ruff's PERF rule
+  "RET",  # Ruff's RET rule
+  "RSE",  # Ruff's RSE rule
+  "NPY",  # Ruff's NPY rule
+  "N",    # Ruff's N rule
+  "I",    # Ruff's I rule
+]
diff --git a/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py b/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py
new file mode 100644
index 00000000..4ff8bc1c
--- /dev/null
+++ b/node-hub/dora-lmdeploy/tests/test_dora_lmdeploy.py
@@ -0,0 +1,12 @@
+"""TODO: Add docstring."""
+
+import pytest
+
+
+def test_import_main():
+    """TODO: Add docstring."""
+    from dora_lmdeploy.main import main
+
+    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
+    with pytest.raises(RuntimeError):
+        main()