Browse Source

Fix openai server

add-vision-to-openai-server
haixuantao 7 months ago
parent
commit
877eb0e188
2 changed files with 131 additions and 61 deletions
  1. +122
    -50
      node-hub/dora-openai-server/dora_openai_server/main.py
  2. +9
    -11
      node-hub/dora-openai-server/pyproject.toml

+ 122
- 50
node-hub/dora-openai-server/dora_openai_server/main.py View File

@@ -1,13 +1,12 @@
"""
FastAPI server with OpenAI compatibility and DORA integration,
"""FastAPI server with OpenAI compatibility and DORA integration,
sending text and image data on separate DORA topics.
"""

import asyncio
import base64
import uuid # For generating unique request IDs
import time # For timestamps
from typing import List, Optional, Union, Dict, Any, Literal
import time # For timestamps
import uuid # For generating unique request IDs
from typing import Any, List, Literal, Optional, Union

import pyarrow as pa
import uvicorn
@@ -19,53 +18,63 @@ from pydantic import BaseModel
DORA_RESPONSE_TIMEOUT_SECONDS = 20
DORA_TEXT_OUTPUT_TOPIC = "user_text_input"
DORA_IMAGE_OUTPUT_TOPIC = "user_image_input"
DORA_RESPONSE_INPUT_TOPIC = "chat_completion_result" # Topic FastAPI listens on
DORA_RESPONSE_INPUT_TOPIC = "chat_completion_result" # Topic FastAPI listens on

app = FastAPI(
title="DORA OpenAI-Compatible Demo Server (Separate Topics)",
description="Sends text and image data on different DORA topics and awaits a consolidated response.",
)


# --- Pydantic Models ---
class ImageUrl(BaseModel):
url: str
detail: Optional[str] = "auto"


class ContentPartText(BaseModel):
type: Literal["text"]
text: str


class ContentPartImage(BaseModel):
type: Literal["image_url"]
image_url: ImageUrl


ContentPart = Union[ContentPartText, ContentPartImage]


class ChatCompletionMessage(BaseModel):
role: str
content: Union[str, List[ContentPart]]


class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatCompletionMessage]
temperature: Optional[float] = 1.0
max_tokens: Optional[int] = 100


class ChatCompletionChoiceMessage(BaseModel):
role: str
content: str


class ChatCompletionChoice(BaseModel):
index: int
message: ChatCompletionChoiceMessage
finish_reason: str
logprobs: Optional[Any] = None


class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int


class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
@@ -75,6 +84,7 @@ class ChatCompletionResponse(BaseModel):
usage: Usage
system_fingerprint: Optional[str] = None


# --- DORA Node Initialization ---
# This dictionary will hold unmatched responses if we implement more robust concurrent handling.
# For now, it's a placeholder for future improvement.
@@ -84,9 +94,12 @@ try:
node = Node()
print("FastAPI Server: DORA Node initialized.")
except Exception as e:
print(f"FastAPI Server: Failed to initialize DORA Node. Running in standalone API mode. Error: {e}")
print(
f"FastAPI Server: Failed to initialize DORA Node. Running in standalone API mode. Error: {e}"
)
node = None


@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
internal_request_id = str(uuid.uuid4())
@@ -109,21 +122,34 @@ async def create_chat_completion(request: ChatCompletionRequest):
if part.type == "text":
user_text_parts.append(part.text)
elif part.type == "image_url":
if user_image_bytes: # Use only the first image
print(f"FastAPI Server (Req {internal_request_id}): Warning - Multiple images found, using the first one.")
if user_image_bytes: # Use only the first image
print(
f"FastAPI Server (Req {internal_request_id}): Warning - Multiple images found, using the first one."
)
continue
image_url_data = part.image_url.url
if image_url_data.startswith("data:image"):
try:
header, encoded_data = image_url_data.split(",", 1)
user_image_content_type = header.split(":")[1].split(";")[0]
user_image_content_type = header.split(":")[1].split(
";"
)[0]
user_image_bytes = base64.b64decode(encoded_data)
print(f"FastAPI Server (Req {internal_request_id}): Decoded image {user_image_content_type}, size: {len(user_image_bytes)} bytes")
print(
f"FastAPI Server (Req {internal_request_id}): Decoded image {user_image_content_type}, size: {len(user_image_bytes)} bytes"
)
except Exception as e:
print(f"FastAPI Server (Req {internal_request_id}): Error decoding base64 image: {e}")
raise HTTPException(status_code=400, detail=f"Invalid base64 image data: {e}")
print(
f"FastAPI Server (Req {internal_request_id}): Error decoding base64 image: {e}"
)
raise HTTPException(
status_code=400,
detail=f"Invalid base64 image data: {e}",
)
else:
print(f"FastAPI Server (Req {internal_request_id}): Warning - Remote image URL '{image_url_data}' ignored. Only data URIs supported.")
print(
f"FastAPI Server (Req {internal_request_id}): Warning - Remote image URL '{image_url_data}' ignored. Only data URIs supported."
)
# Consider if you want to break after the first user message or aggregate all
# break

@@ -135,19 +161,24 @@ async def create_chat_completion(request: ChatCompletionRequest):
text_payload = {"request_id": internal_request_id, "text": final_user_text}
arrow_text_data = pa.array([text_payload])
node.send_output(DORA_TEXT_OUTPUT_TOPIC, arrow_text_data)
print(f"FastAPI Server (Req {internal_request_id}): Sent text to DORA topic '{DORA_TEXT_OUTPUT_TOPIC}'.")
print(
f"FastAPI Server (Req {internal_request_id}): Sent text to DORA topic '{DORA_TEXT_OUTPUT_TOPIC}'."
)
data_sent_to_dora = True

if user_image_bytes:
image_payload = {
"request_id": internal_request_id,
"image_bytes": user_image_bytes,
"image_content_type": user_image_content_type or "application/octet-stream"
"image_content_type": user_image_content_type
or "application/octet-stream",
}
arrow_image_data = pa.array([image_payload])
node.send_output(DORA_IMAGE_OUTPUT_TOPIC, arrow_image_data)
print(f"FastAPI Server (Req {internal_request_id}): Sent image to DORA topic '{DORA_IMAGE_OUTPUT_TOPIC}'.")
prompt_tokens += len(user_image_bytes) # Crude image token approximation
print(
f"FastAPI Server (Req {internal_request_id}): Sent image to DORA topic '{DORA_IMAGE_OUTPUT_TOPIC}'."
)
prompt_tokens += len(user_image_bytes) # Crude image token approximation
data_sent_to_dora = True

response_str: str
@@ -158,50 +189,68 @@ async def create_chat_completion(request: ChatCompletionRequest):
response_str = "No user text or image found to send to DORA."
print(f"FastAPI Server (Req {internal_request_id}): {response_str}")
else:
print(f"FastAPI Server (Req {internal_request_id}): Waiting for response from DORA on topic '{DORA_RESPONSE_INPUT_TOPIC}'...")
print(
f"FastAPI Server (Req {internal_request_id}): Waiting for response from DORA on topic '{DORA_RESPONSE_INPUT_TOPIC}'..."
)
response_str = f"Timeout: No response from DORA for request_id {internal_request_id} within {DORA_RESPONSE_TIMEOUT_SECONDS}s."
# WARNING: This blocking `node.next()` loop is not ideal for highly concurrent requests
# in a single FastAPI worker process, as one request might block others or consume
# a response meant for another if `request_id` matching isn't perfect or fast enough.
# A more robust solution would involve a dedicated listener task and async Futures/Queues.
start_wait_time = time.monotonic()
while time.monotonic() - start_wait_time < DORA_RESPONSE_TIMEOUT_SECONDS:
remaining_timeout = DORA_RESPONSE_TIMEOUT_SECONDS - (time.monotonic() - start_wait_time)
if remaining_timeout <= 0: break
remaining_timeout = DORA_RESPONSE_TIMEOUT_SECONDS - (
time.monotonic() - start_wait_time
)
if remaining_timeout <= 0:
break

event = node.next(
timeout=min(1.0, remaining_timeout)
) # Poll with a smaller timeout

event = node.next(timeout=min(1.0, remaining_timeout)) # Poll with a smaller timeout
if event is None: # Timeout for this poll iteration
if event is None: # Timeout for this poll iteration
continue

if event["type"] == "INPUT" and event["id"] == DORA_RESPONSE_INPUT_TOPIC:
response_value_arrow = event["value"]
if response_value_arrow and len(response_value_arrow) > 0:
dora_response_data = response_value_arrow[0].as_py() # Expecting a dict
dora_response_data = response_value_arrow[
0
].as_py() # Expecting a dict
if isinstance(dora_response_data, dict):
resp_request_id = dora_response_data.get("request_id")
if resp_request_id == internal_request_id:
response_str = dora_response_data.get("response_text", f"DORA response for {internal_request_id} missing 'response_text'.")
print(f"FastAPI Server (Req {internal_request_id}): Received correlated DORA response.")
break # Correct response received
else:
# This response is for another request. Ideally, store it.
print(f"FastAPI Server (Req {internal_request_id}): Received DORA response for different request_id '{resp_request_id}'. Discarding and waiting. THIS IS A CONCURRENCY ISSUE.")
# unmatched_dora_responses[resp_request_id] = dora_response_data # Example of storing
response_str = dora_response_data.get(
"response_text",
f"DORA response for {internal_request_id} missing 'response_text'.",
)
print(
f"FastAPI Server (Req {internal_request_id}): Received correlated DORA response."
)
break # Correct response received
# This response is for another request. Ideally, store it.
print(
f"FastAPI Server (Req {internal_request_id}): Received DORA response for different request_id '{resp_request_id}'. Discarding and waiting. THIS IS A CONCURRENCY ISSUE."
)
# unmatched_dora_responses[resp_request_id] = dora_response_data # Example of storing
else:
response_str = f"Unrecognized DORA response format for {internal_request_id}: {str(dora_response_data)[:100]}"
break
else:
response_str = f"Empty response payload from DORA for {internal_request_id}."
response_str = (
f"Empty response payload from DORA for {internal_request_id}."
)
break
elif event["type"] == "ERROR":
response_str = f"Error event from DORA for {internal_request_id}: {event.get('value', event.get('error', 'Unknown DORA Error'))}"
print(response_str)
break
else: # Outer while loop timed out
print(f"FastAPI Server (Req {internal_request_id}): Overall timeout waiting for DORA response.")

else: # Outer while loop timed out
print(
f"FastAPI Server (Req {internal_request_id}): Overall timeout waiting for DORA response."
)

completion_tokens = len(response_str)
total_tokens = prompt_tokens + completion_tokens
@@ -213,7 +262,9 @@ async def create_chat_completion(request: ChatCompletionRequest):
choices=[
ChatCompletionChoice(
index=0,
message=ChatCompletionChoiceMessage(role="assistant", content=response_str),
message=ChatCompletionChoiceMessage(
role="assistant", content=response_str
),
finish_reason="stop",
)
],
@@ -224,6 +275,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
),
)


@app.get("/v1/models")
async def list_models():
return {
@@ -231,12 +283,17 @@ async def list_models():
"data": [
{
"id": "dora-multi-stream-vision",
"object": "model", "created": int(time.time()), "owned_by": "dora-ai",
"permission": [], "root": "dora-multi-stream-vision", "parent": None,
"object": "model",
"created": int(time.time()),
"owned_by": "dora-ai",
"permission": [],
"root": "dora-multi-stream-vision",
"parent": None,
},
],
}


async def run_fastapi_server_task():
config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
server = uvicorn.Server(config)
@@ -244,6 +301,7 @@ async def run_fastapi_server_task():
await server.serve()
print("FastAPI Server: Uvicorn server stopped.")


async def run_dora_main_loop_task():
if not node:
print("FastAPI Server: DORA node not initialized, DORA main loop skipped.")
@@ -253,21 +311,31 @@ async def run_dora_main_loop_task():
while True:
# This loop is primarily for the main "STOP" event for the FastAPI node itself.
# Individual request/response cycles are handled within the endpoint.
event = node.next(timeout=1.0) # Check for STOP periodically
event = node.next(timeout=1.0) # Check for STOP periodically
if event is None:
await asyncio.sleep(0.01) # Yield control if no event
await asyncio.sleep(0.01) # Yield control if no event
continue
if event["type"] == "STOP":
print("FastAPI Server: DORA STOP event received. Requesting server shutdown.")
print(
"FastAPI Server: DORA STOP event received. Requesting server shutdown."
)
# Attempt to gracefully shut down Uvicorn
# This is tricky; uvicorn's server.shutdown() or server.should_exit might be better
# For simplicity, we cancel the server task.
for task in asyncio.all_tasks():
# Identify the server task more reliably if possible
if task.get_coro().__name__ == 'serve' and hasattr(task.get_coro(), 'cr_frame') and \
isinstance(task.get_coro().cr_frame.f_locals.get('self'), uvicorn.Server):
if (
task.get_coro().__name__ == "serve"
and hasattr(task.get_coro(), "cr_frame")
and isinstance(
task.get_coro().cr_frame.f_locals.get("self"),
uvicorn.Server,
)
):
task.cancel()
print("FastAPI Server: Uvicorn server task cancellation requested.")
print(
"FastAPI Server: Uvicorn server task cancellation requested."
)
break
# Handle other unexpected general inputs/errors for the FastAPI node if necessary
# elif event["type"] == "INPUT":
@@ -280,9 +348,10 @@ async def run_dora_main_loop_task():
finally:
print("FastAPI Server: DORA main loop listener finished.")


async def main_async_runner():
server_task = asyncio.create_task(run_fastapi_server_task())
# Only run the DORA main loop if the node was initialized.
# This loop is mainly for the STOP event.
dora_listener_task = None
@@ -293,17 +362,19 @@ async def main_async_runner():
tasks_to_wait_for = [server_task]

done, pending = await asyncio.wait(
tasks_to_wait_for, return_when=asyncio.FIRST_COMPLETED,
tasks_to_wait_for,
return_when=asyncio.FIRST_COMPLETED,
)

for task in pending:
print(f"FastAPI Server: Cancelling pending task: {task.get_name()}")
task.cancel()
if pending:
await asyncio.gather(*pending, return_exceptions=True)
print("FastAPI Server: Application shutdown complete.")


def main():
print("FastAPI Server: Starting application...")
try:
@@ -313,5 +384,6 @@ def main():
finally:
print("FastAPI Server: Exited main function.")


if __name__ == "__main__":
main()
main()

+ 9
- 11
node-hub/dora-openai-server/pyproject.toml View File

@@ -2,8 +2,8 @@
name = "dora-openai-server"
version = "0.3.11"
authors = [
{ name = "Haixuan Xavier Tao", email = "tao.xavier@outlook.com" },
{ name = "Enzo Le Van", email = "dev@enzo-le-van.fr" },
{ name = "Haixuan Xavier Tao", email = "tao.xavier@outlook.com" },
{ name = "Enzo Le Van", email = "dev@enzo-le-van.fr" },
]
description = "Dora OpenAI API Server"
license = { text = "MIT" }
@@ -11,14 +11,13 @@ readme = "README.md"
requires-python = ">=3.8"

dependencies = [
"dora-rs >= 0.3.9",
"numpy < 2.0.0",
"pyarrow >= 5.0.0",

"fastapi >= 0.115",
"asyncio >= 3.4",
"uvicorn >= 0.31",
"pydantic >= 2.9",
"dora-rs >= 0.3.9",
"numpy < 2.0.0",
"pyarrow >= 5.0.0",
"fastapi >= 0.115",
"asyncio >= 3.4",
"uvicorn >= 0.31",
"pydantic >= 2.9",
]

[dependency-groups]
@@ -29,7 +28,6 @@ dora-openai-server = "dora_openai_server.main:main"

[tool.ruff.lint]
extend-select = [
"D", # pydocstyle
"UP", # Ruff's UP rule
"PERF", # Ruff's PERF rule
"RET", # Ruff's RET rule


Loading…
Cancel
Save