Browse Source

V0.1.0

pull/1/head
gjl 1 year ago
parent
commit
b5503c2b19
21 changed files with 1221 additions and 19 deletions
  1. +13
    -1
      requirements.txt
  2. +177
    -0
      src/mindpilot/app/api/api_schemas.py
  3. +1
    -1
      src/mindpilot/app/api/chat_routes.py
  4. +202
    -0
      src/mindpilot/app/callback_handler/agent_callback_handler.py
  5. +33
    -0
      src/mindpilot/app/callback_handler/conversation_callback_handler.py
  6. +13
    -13
      src/mindpilot/app/chat/chat.py
  7. +51
    -0
      src/mindpilot/app/chat/utils.py
  8. +0
    -0
      src/mindpilot/app/configs/__init__.py
  9. +122
    -0
      src/mindpilot/app/configs/prompt_config.py
  10. +78
    -0
      src/mindpilot/app/memory/conversation_db_buffer_memory.py
  11. +4
    -0
      src/mindpilot/app/pydantic_v1.py
  12. +3
    -0
      src/mindpilot/app/pydantic_v2.py
  13. +12
    -0
      src/mindpilot/app/tools/__init__.py
  14. +13
    -0
      src/mindpilot/app/tools/arxiv.py
  15. +19
    -0
      src/mindpilot/app/tools/calculate.py
  16. +142
    -0
      src/mindpilot/app/tools/search_internet.py
  17. +13
    -0
      src/mindpilot/app/tools/shell.py
  18. +146
    -0
      src/mindpilot/app/tools/tools_registry.py
  19. +33
    -0
      src/mindpilot/app/tools/weather_check.py
  20. +19
    -0
      src/mindpilot/app/tools/wolfram.py
  21. +127
    -4
      src/mindpilot/app/utils.py

+ 13
- 1
requirements.txt View File

@@ -5,4 +5,16 @@ uvicorn == 0.30.1
dataclasses-json == 0.6.7
langchain == 0.2.7
starlette~=0.37.2
sse-starlette == 2.1.2
sse-starlette == 2.1.2
openai == 1.35.14
langchain_openai == 0.1.16
sqlalchemy~=2.0.31
pillow~=10.4.0
requests~=2.32.2
httpx~=0.27.0
pydantic~=2.8.2
numexpr~=2.8.7
langchain_community==0.2.7
markdownify~=0.13.1
strsimpy~=0.2.1
metaphor_python==0.1.23

+ 177
- 0
src/mindpilot/app/api/api_schemas.py View File

@@ -0,0 +1,177 @@
from __future__ import annotations

import json
import time
from typing import Dict, List, Literal, Optional, Union

from fastapi import UploadFile
from openai.types.chat import (
ChatCompletionMessageParam,
ChatCompletionToolChoiceOptionParam,
ChatCompletionToolParam,
completion_create_params,
)

# from chatchat.configs import DEFAULT_LLM_MODEL, TEMPERATURE
DEFAULT_LLM_MODEL = None # TODO 配置文件
TEMPERATURE = 0.8
from ..pydantic_v2 import AnyUrl, BaseModel, Field
from ..utils import MsgType


class OpenAIBaseInput(BaseModel):
user: Optional[str] = None
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict] = None
extra_query: Optional[Dict] = None
extra_json: Optional[Dict] = Field(None, alias="extra_body")
timeout: Optional[float] = None

class Config:
extra = "allow"


class OpenAIChatInput(OpenAIBaseInput):
messages: List[ChatCompletionMessageParam]
model: str = DEFAULT_LLM_MODEL
frequency_penalty: Optional[float] = None
function_call: Optional[completion_create_params.FunctionCall] = None
functions: List[completion_create_params.Function] = None
logit_bias: Optional[Dict[str, int]] = None
logprobs: Optional[bool] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[float] = None
response_format: completion_create_params.ResponseFormat = None
seed: Optional[int] = None
stop: Union[Optional[str], List[str]] = None
stream: Optional[bool] = None
temperature: Optional[float] = TEMPERATURE
tool_choice: Optional[Union[ChatCompletionToolChoiceOptionParam, str]] = None
tools: List[Union[ChatCompletionToolParam, str]] = None
top_logprobs: Optional[int] = None
top_p: Optional[float] = None


class OpenAIEmbeddingsInput(OpenAIBaseInput):
input: Union[str, List[str]]
model: str
dimensions: Optional[int] = None
encoding_format: Optional[Literal["float", "base64"]] = None


class OpenAIImageBaseInput(OpenAIBaseInput):
model: str
n: int = 1
response_format: Optional[Literal["url", "b64_json"]] = None
size: Optional[
Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
] = "256x256"


class OpenAIImageGenerationsInput(OpenAIImageBaseInput):
prompt: str
quality: Literal["standard", "hd"] = None
style: Optional[Literal["vivid", "natural"]] = None


class OpenAIImageVariationsInput(OpenAIImageBaseInput):
image: Union[UploadFile, AnyUrl]


class OpenAIImageEditsInput(OpenAIImageVariationsInput):
prompt: str
mask: Union[UploadFile, AnyUrl]


class OpenAIAudioTranslationsInput(OpenAIBaseInput):
file: Union[UploadFile, AnyUrl]
model: str
prompt: Optional[str] = None
response_format: Optional[str] = None
temperature: float = TEMPERATURE


class OpenAIAudioTranscriptionsInput(OpenAIAudioTranslationsInput):
language: Optional[str] = None
timestamp_granularities: Optional[List[Literal["word", "segment"]]] = None


class OpenAIAudioSpeechInput(OpenAIBaseInput):
input: str
model: str
voice: str
response_format: Optional[
Literal["mp3", "opus", "aac", "flac", "pcm", "wav"]
] = None
speed: Optional[float] = None


# class OpenAIFileInput(OpenAIBaseInput):
# file: UploadFile # FileTypes
# purpose: Literal["fine-tune", "assistants"] = "assistants"


class OpenAIBaseOutput(BaseModel):
id: Optional[str] = None
content: Optional[str] = None
model: Optional[str] = None
object: Literal[
"chat.completion", "chat.completion.chunk"
] = "chat.completion.chunk"
role: Literal["assistant"] = "assistant"
finish_reason: Optional[str] = None
created: int = Field(default_factory=lambda: int(time.time()))
tool_calls: List[Dict] = []

status: Optional[int] = None # AgentStatus
message_type: int = MsgType.TEXT
message_id: Optional[str] = None # id in database table
is_ref: bool = False # wheather show in seperated expander

class Config:
extra = "allow"

def model_dump(self) -> dict:
result = {
"id": self.id,
"object": self.object,
"model": self.model,
"created": self.created,
"status": self.status,
"message_type": self.message_type,
"message_id": self.message_id,
"is_ref": self.is_ref,
**(self.model_extra or {}),
}

if self.object == "chat.completion.chunk":
result["choices"] = [
{
"delta": {
"content": self.content,
"tool_calls": self.tool_calls,
},
"role": self.role,
}
]
elif self.object == "chat.completion":
result["choices"] = [
{
"message": {
"role": self.role,
"content": self.content,
"finish_reason": self.finish_reason,
"tool_calls": self.tool_calls,
}
}
]
return result

def model_dump_json(self):
return json.dumps(self.model_dump(), ensure_ascii=False)


class OpenAIChatOutput(OpenAIBaseOutput):
...

+ 1
- 1
src/mindpilot/app/api/chat_routes.py View File

@@ -137,7 +137,7 @@ chat_router.post(
# else None
# )
#
# chat_model_config = {} # TODO: 前端支持配置模型
# chat_model_config = {}
# tool_names = [x["function"]["name"] for x in body.tools]
# tool_config = {name: get_tool_config(name) for name in tool_names}
# result = await chat(


+ 202
- 0
src/mindpilot/app/callback_handler/agent_callback_handler.py View File

@@ -0,0 +1,202 @@
from __future__ import annotations

import asyncio
import json
from typing import Any, Dict, List, Optional
from uuid import UUID

from langchain.callbacks import AsyncIteratorCallbackHandler
from langchain.schema import AgentAction, AgentFinish
from langchain_core.outputs import LLMResult


def dumps(obj: Dict) -> str:
return json.dumps(obj, ensure_ascii=False)


class AgentStatus:
llm_start: int = 1
llm_new_token: int = 2
llm_end: int = 3
agent_action: int = 4
agent_finish: int = 5
tool_start: int = 6
tool_end: int = 7
error: int = 8


class AgentExecutorAsyncIteratorCallbackHandler(AsyncIteratorCallbackHandler):
def __init__(self):
super().__init__()
self.queue = asyncio.Queue()
self.done = asyncio.Event()
self.out = True

async def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
data = {
"status": AgentStatus.llm_start,
"text": "",
}
self.done.clear()
self.queue.put_nowait(dumps(data))

async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
special_tokens = ["\nAction:", "\nObservation:", "<|observation|>"]
for stoken in special_tokens:
if stoken in token:
before_action = token.split(stoken)[0]
data = {
"status": AgentStatus.llm_new_token,
"text": before_action + "\n",
}
self.queue.put_nowait(dumps(data))
self.out = False
break

if token is not None and token != "" and self.out:
data = {
"status": AgentStatus.llm_new_token,
"text": token,
}
self.queue.put_nowait(dumps(data))

async def on_chat_model_start(
self,
serialized: Dict[str, Any],
messages: List[List],
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
data = {
"status": AgentStatus.llm_start,
"text": "",
}
self.done.clear()
self.queue.put_nowait(dumps(data))

async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
data = {
"status": AgentStatus.llm_end,
"text": response.generations[0][0].message.content,
}
self.queue.put_nowait(dumps(data))

async def on_llm_error(
self, error: Exception | KeyboardInterrupt, **kwargs: Any
) -> None:
data = {
"status": AgentStatus.error,
"text": str(error),
}
self.queue.put_nowait(dumps(data))

async def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
data = {
"run_id": str(run_id),
"status": AgentStatus.tool_start,
"tool": serialized["name"],
"tool_input": input_str,
}
self.queue.put_nowait(dumps(data))

async def on_tool_end(
self,
output: str,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
"""Run when tool ends running."""
data = {
"run_id": str(run_id),
"status": AgentStatus.tool_end,
"tool_output": output,
}
# self.done.clear()
self.queue.put_nowait(dumps(data))

async def on_tool_error(
self,
error: BaseException,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
"""Run when tool errors."""
data = {
"run_id": str(run_id),
"status": AgentStatus.tool_end,
"tool_output": str(error),
"is_error": True,
}
# self.done.clear()
self.queue.put_nowait(dumps(data))

async def on_agent_action(
self,
action: AgentAction,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
data = {
"status": AgentStatus.agent_action,
"tool_name": action.tool,
"tool_input": action.tool_input,
"text": action.log,
}
self.queue.put_nowait(dumps(data))

async def on_agent_finish(
self,
finish: AgentFinish,
*,
run_id: UUID,
parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
if "Thought:" in finish.return_values["output"]:
finish.return_values["output"] = finish.return_values["output"].replace(
"Thought:", ""
)

data = {
"status": AgentStatus.agent_finish,
"text": finish.return_values["output"],
}
self.queue.put_nowait(dumps(data))

async def on_chain_end(
self,
outputs: Dict[str, Any],
*,
run_id: UUID,
parent_run_id: UUID | None = None,
tags: List[str] | None = None,
**kwargs: Any,
) -> None:
self.done.set()
self.out = True

+ 33
- 0
src/mindpilot/app/callback_handler/conversation_callback_handler.py View File

@@ -0,0 +1,33 @@
from typing import Any, Dict, List

from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import LLMResult

# from chatchat.server.db.repository import update_message


class ConversationCallbackHandler(BaseCallbackHandler):
raise_error: bool = True

def __init__(
self, conversation_id: str, message_id: str, chat_type: str, query: str
):
self.conversation_id = conversation_id
self.message_id = message_id
self.chat_type = chat_type
self.query = query
self.start_at = None

@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True

def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
pass

def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
answer = response.generations[0][0].text
# update_message(self.message_id, answer)

+ 13
- 13
src/mindpilot/app/chat/chat.py View File

@@ -13,16 +13,16 @@ from langchain_core.output_parsers import StrOutputParser
from sse_starlette.sse import EventSourceResponse

from ..agent.agents_registry import agents_registry
from chatchat.server.api_server.api_schemas import OpenAIChatOutput
from chatchat.server.callback_handler.agent_callback_handler import (
from ..api.api_schemas import OpenAIChatOutput
from ..callback_handler.agent_callback_handler import (
AgentExecutorAsyncIteratorCallbackHandler,
AgentStatus,
)
from chatchat.server.chat.utils import History
from chatchat.server.memory.conversation_db_buffer_memory import (
ConversationBufferDBMemory,
)
from chatchat.server.utils import (
from ..chat.utils import History
# from chatchat.server.memory.conversation_db_buffer_memory import (
# ConversationBufferDBMemory,
# )
from ..utils import (
MsgType,
get_ChatOpenAI,
get_prompt_template,
@@ -43,7 +43,6 @@ def create_models_from_config(configs, callbacks, stream):
max_tokens=params.get("max_tokens", 1000),
callbacks=callbacks,
streaming=stream,
local_wrap=True,
)
models[model_type] = model_instance
prompt_name = params.get("prompt_name", "default")
@@ -67,11 +66,12 @@ def create_models_chains(
[i.to_msg_template() for i in history] + [input_msg]
)
elif conversation_id and history_len > 0:
memory = ConversationBufferDBMemory(
conversation_id=conversation_id,
llm=models["llm_model"],
message_limit=history_len,
)
# memory = ConversationBufferDBMemory(
# conversation_id=conversation_id,
# llm=models["llm_model"],
# message_limit=history_len,
# )
pass
else:
input_msg = History(role="user", content=prompts["llm_model"]).to_msg_template(
False


+ 51
- 0
src/mindpilot/app/chat/utils.py View File

@@ -0,0 +1,51 @@
import logging
from functools import lru_cache
from typing import Dict, List, Tuple, Union

from langchain.prompts.chat import ChatMessagePromptTemplate

from ..pydantic_v2 import BaseModel, Field

logger = logging.getLogger()


class History(BaseModel):
"""
对话历史
可从dict生成,如
h = History(**{"role":"user","content":"你好"})
也可转换为tuple,如
h.to_msy_tuple = ("human", "你好")
"""

role: str = Field(...)
content: str = Field(...)

def to_msg_tuple(self):
return "ai" if self.role == "assistant" else "human", self.content

def to_msg_template(self, is_raw=True) -> ChatMessagePromptTemplate:
role_maps = {
"ai": "assistant",
"human": "user",
}
role = role_maps.get(self.role, self.role)
if is_raw: # 当前默认历史消息都是没有input_variable的文本。
content = "{% raw %}" + self.content + "{% endraw %}"
else:
content = self.content

return ChatMessagePromptTemplate.from_template(
content,
"jinja2",
role=role,
)

@classmethod
def from_data(cls, h: Union[List, Tuple, Dict]) -> "History":
if isinstance(h, (list, tuple)) and len(h) >= 2:
h = cls(role=h[0], content=h[1])
elif isinstance(h, dict):
h = cls(**h)

return h

+ 0
- 0
src/mindpilot/app/configs/__init__.py View File


+ 122
- 0
src/mindpilot/app/configs/prompt_config.py View File

@@ -0,0 +1,122 @@
# 除 Agent 模板使用 f-string 外,其它均使用 jinja2 格式

PROMPT_TEMPLATES = {
"preprocess_model": {
"default": "你只要回复0 和 1 ,代表不需要使用工具。以下几种问题不需要使用工具:"
"1. 需要联网查询的内容\n"
"2. 需要计算的内容\n"
"3. 需要查询实时性的内容\n"
"如果我的输入满足这几种情况,返回1。其他输入,请你回复0,你只要返回一个数字\n"
"这是我的问题:"
},
"llm_model": {
"default": "{{input}}",
"with_history": "The following is a friendly conversation between a human and an AI. "
"The AI is talkative and provides lots of specific details from its context. "
"If the AI does not know the answer to a question, it truthfully says it does not know.\n\n"
"Current conversation:\n"
"{{history}}\n"
"Human: {{input}}\n"
"AI:",
"rag": "【指令】根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题”,不允许在答案中添加编造成分,答案请使用中文。\n\n"
"【已知信息】{{context}}\n\n"
"【问题】{{question}}\n",
"rag_default": "{{question}}",
},
"action_model": {
"GPT-4": "Answer the following questions as best you can. You have access to the following tools:\n"
"The way you use the tools is by specifying a json blob.\n"
"Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n"
'The only values that should be in the "action" field are: {tool_names}\n'
"The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n"
"```\n\n"
"{{{{\n"
' "action": $TOOL_NAME,\n'
' "action_input": $INPUT\n'
"}}}}\n"
"```\n\n"
"ALWAYS use the following format:\n"
"Question: the input question you must answer\n"
"Thought: you should always think about what to do\n"
"Action:\n"
"```\n\n"
"$JSON_BLOB"
"```\n\n"
"Observation: the result of the action\n"
"... (this Thought/Action/Observation can repeat N times)\n"
"Thought: I now know the final answer\n"
"Final Answer: the final answer to the original input question\n"
"Begin! Reminder to always use the exact characters `Final Answer` when responding.\n"
"Question:{input}\n"
"Thought:{agent_scratchpad}\n",
"ChatGLM3": "You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n"
"You have access to the following tools:\n"
"{tools}\n"
"Use a json blob to specify a tool by providing an action key (tool name)\n"
"and an action_input key (tool input).\n"
'Valid "action" values: "Final Answer" or [{tool_names}]\n'
"Provide only ONE action per $JSON_BLOB, as shown:\n\n"
"```\n"
"{{{{\n"
' "action": $TOOL_NAME,\n'
' "action_input": $INPUT\n'
"}}}}\n"
"```\n\n"
"Follow this format:\n\n"
"Question: input question to answer\n"
"Thought: consider previous and subsequent steps\n"
"Action:\n"
"```\n"
"$JSON_BLOB\n"
"```\n"
"Observation: action result\n"
"... (repeat Thought/Action/Observation N times)\n"
"Thought: I know what to respond\n"
"Action:\n"
"```\n"
"{{{{\n"
' "action": "Final Answer",\n'
' "action_input": "Final response to human"\n'
"}}}}\n"
"Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n"
"Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n"
"Question: {input}\n\n"
"{agent_scratchpad}\n",
"qwen": "Answer the following questions as best you can. You have access to the following APIs:\n\n"
"{tools}\n\n"
"Use the following format:\n\n"
"Question: the input question you must answer\n"
"Thought: you should always think about what to do\n"
"Action: the action to take, should be one of [{tool_names}]\n"
"Action Input: the input to the action\n"
"Observation: the result of the action\n"
"... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n"
"Thought: I now know the final answer\n"
"Final Answer: the final answer to the original input question\n\n"
"Format the Action Input as a JSON object.\n\n"
"Begin!\n\n"
"Question: {input}\n\n"
"{agent_scratchpad}\n\n",
"structured-chat-agent": "Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n"
"{tools}\n\n"
"Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n"
'Valid "action" values: "Final Answer" or {tool_names}\n\n'
"Provide only ONE action per $JSON_BLOB, as shown:\n\n"
'```\n{{\n "action": $TOOL_NAME,\n "action_input": $INPUT\n}}\n```\n\n'
"Follow this format:\n\n"
"Question: input question to answer\n"
"Thought: consider previous and subsequent steps\n"
"Action:\n```\n$JSON_BLOB\n```\n"
"Observation: action result\n"
"... (repeat Thought/Action/Observation N times)\n"
"Thought: I know what to respond\n"
'Action:\n```\n{{\n "action": "Final Answer",\n "action_input": "Final response to human"\n}}\n\n'
"Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation\n"
"{input}\n\n"
"{agent_scratchpad}\n\n",
# '(reminder to respond in a JSON blob no matter what)'
},
"postprocess_model": {
"default": "{{input}}",
},
}

+ 78
- 0
src/mindpilot/app/memory/conversation_db_buffer_memory.py View File

@@ -0,0 +1,78 @@
import logging
from typing import Any, Dict, List

from langchain.memory.chat_memory import BaseChatMemory
from langchain.schema import AIMessage, BaseMessage, HumanMessage, get_buffer_string
from langchain.schema.language_model import BaseLanguageModel

from chatchat.server.db.models.message_model import MessageModel
from chatchat.server.db.repository.message_repository import filter_message


class ConversationBufferDBMemory(BaseChatMemory):
conversation_id: str
human_prefix: str = "Human"
ai_prefix: str = "Assistant"
llm: BaseLanguageModel
memory_key: str = "history"
max_token_limit: int = 2000
message_limit: int = 10

@property
def buffer(self) -> List[BaseMessage]:
"""String buffer of memory."""
# fetch limited messages desc, and return reversed

messages = filter_message(
conversation_id=self.conversation_id, limit=self.message_limit
)
# 返回的记录按时间倒序,转为正序
messages = list(reversed(messages))
chat_messages: List[BaseMessage] = []
for message in messages:
chat_messages.append(HumanMessage(content=message["query"]))
chat_messages.append(AIMessage(content=message["response"]))

if not chat_messages:
return []

# prune the chat message if it exceeds the max token limit
curr_buffer_length = self.llm.get_num_tokens(get_buffer_string(chat_messages))
if curr_buffer_length > self.max_token_limit:
pruned_memory = []
while curr_buffer_length > self.max_token_limit and chat_messages:
pruned_memory.append(chat_messages.pop(0))
curr_buffer_length = self.llm.get_num_tokens(
get_buffer_string(chat_messages)
)

return chat_messages

@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables.

:meta private:
"""
return [self.memory_key]

def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return history buffer."""
buffer: Any = self.buffer
if self.return_messages:
final_buffer: Any = buffer
else:
final_buffer = get_buffer_string(
buffer,
human_prefix=self.human_prefix,
ai_prefix=self.ai_prefix,
)
return {self.memory_key: final_buffer}

def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
"""Nothing should be saved or changed"""
pass

def clear(self) -> None:
"""Nothing to clear, got a memory like a vault."""
pass

+ 4
- 0
src/mindpilot/app/pydantic_v1.py View File

@@ -0,0 +1,4 @@
from langchain_core.pydantic_v1 import *
from pydantic.v1.fields import FieldInfo
from pydantic.v1.schema import model_schema
from pydantic.v1.typing import typing

+ 3
- 0
src/mindpilot/app/pydantic_v2.py View File

@@ -0,0 +1,3 @@
from pydantic import *
from pydantic import typing
from pydantic.fields import FieldInfo

+ 12
- 0
src/mindpilot/app/tools/__init__.py View File

@@ -0,0 +1,12 @@
# from .aqa_processor import aqa_processor
from .arxiv import arxiv
from .calculate import calculate
from .search_internet import search_internet
# from .search_local_knowledgebase import search_local_knowledgebase
# from .search_youtube import search_youtube
from .shell import shell
# from .text2image import text2images
# from .text2sql import text2sql
# from .vqa_processor import vqa_processor
from .weather_check import weather_check
from .wolfram import wolfram

+ 13
- 0
src/mindpilot/app/tools/arxiv.py View File

@@ -0,0 +1,13 @@
# LangChain 的 ArxivQueryRun 工具
from ..pydantic_v1 import Field

from .tools_registry import BaseToolOutput, regist_tool


@regist_tool(title="ARXIV论文")
def arxiv(query: str = Field(description="The search query title")):
"""A wrapper around Arxiv.org for searching and retrieving scientific articles in various fields."""
from langchain.tools.arxiv.tool import ArxivQueryRun

tool = ArxivQueryRun()
return BaseToolOutput(tool.run(tool_input=query))

+ 19
- 0
src/mindpilot/app/tools/calculate.py View File

@@ -0,0 +1,19 @@
from ..pydantic_v1 import Field

from .tools_registry import BaseToolOutput, regist_tool


@regist_tool(title="数学计算器")
def calculate(text: str = Field(description="a math expression")) -> float:
"""
Useful to answer questions about simple calculations.
translate user question to a math expression that can be evaluated by numexpr.
"""
import numexpr

try:
ret = str(numexpr.evaluate(text))
except Exception as e:
ret = f"wrong: {e}"

return BaseToolOutput(ret)

+ 142
- 0
src/mindpilot/app/tools/search_internet.py View File

@@ -0,0 +1,142 @@
from typing import Dict, List

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.utilities.bing_search import BingSearchAPIWrapper
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from markdownify import markdownify
from strsimpy.normalized_levenshtein import NormalizedLevenshtein

from ..pydantic_v1 import Field
# from chatchat.server.utils import get_tool_config

from .tools_registry import BaseToolOutput, regist_tool


def bing_search(text, config):
search = BingSearchAPIWrapper(
bing_subscription_key=config["bing_key"],
bing_search_url=config["bing_search_url"],
)
return search.results(text, config["result_len"])


def duckduckgo_search(text, config):
search = DuckDuckGoSearchAPIWrapper()
return search.results(text, config["result_len"])


def metaphor_search(
text: str,
config: dict,
) -> List[Dict]:
from metaphor_python import Metaphor

client = Metaphor(config["metaphor_api_key"])
search = client.search(text, num_results=config["result_len"], use_autoprompt=True)
contents = search.get_contents().contents
for x in contents:
x.extract = markdownify(x.extract)
if config["split_result"]:
docs = [
Document(page_content=x.extract, metadata={"link": x.url, "title": x.title})
for x in contents
]
text_splitter = RecursiveCharacterTextSplitter(
["\n\n", "\n", ".", " "],
chunk_size=config["chunk_size"],
chunk_overlap=config["chunk_overlap"],
)
splitted_docs = text_splitter.split_documents(docs)
if len(splitted_docs) > config["result_len"]:
normal = NormalizedLevenshtein()
for x in splitted_docs:
x.metadata["score"] = normal.similarity(text, x.page_content)
splitted_docs.sort(key=lambda x: x.metadata["score"], reverse=True)
splitted_docs = splitted_docs[: config["result_len"]]

docs = [
{
"snippet": x.page_content,
"link": x.metadata["link"],
"title": x.metadata["title"],
}
for x in splitted_docs
]
else:
docs = [
{"snippet": x.extract, "link": x.url, "title": x.title} for x in contents
]

return docs


SEARCH_ENGINES = {
"bing": bing_search,
"duckduckgo": duckduckgo_search,
"metaphor": metaphor_search,
}


def search_result2docs(search_results):
docs = []
for result in search_results:
doc = Document(
page_content=result["snippet"] if "snippet" in result.keys() else "",
metadata={
"source": result["link"] if "link" in result.keys() else "",
"filename": result["title"] if "title" in result.keys() else "",
},
)
docs.append(doc)
return docs


def search_engine(query: str, config: dict):
search_engine_use = SEARCH_ENGINES[config["search_engine_name"]]
results = search_engine_use(
text=query, config=config["search_engine_config"][config["search_engine_name"]]
)
docs = search_result2docs(results)
context = ""
docs = [
f"""出处 [{inum + 1}] [{doc.metadata["source"]}]({doc.metadata["source"]}) \n\n{doc.page_content}\n\n"""
for inum, doc in enumerate(docs)
]

for doc in docs:
context += doc + "\n"
return context


@regist_tool(title="互联网搜索")
def search_internet(query: str = Field(description="query for Internet search")):
"""Use this tool to use bing search engine to search the internet and get information."""
# TODO 设计配置文件
tool_config = {
"use": False,
"search_engine_name": "bing",
"search_engine_config": {
"bing": {
"result_len": 3,
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
"bing_key": "0f42b09dce16474a81c01562ded071dc",
},
"metaphor": {
"result_len": 3,
"metaphor_api_key": "",
"split_result": False,
"chunk_size": 500,
"chunk_overlap": 0,
},
"duckduckgo": {"result_len": 3},
},
"top_k": 10,
"verbose": "Origin",
"conclude_prompt": "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 "
"</指令>\n<已知信息>{{ context }}</已知信息>\n"
"<问题>\n"
"{{ question }}\n"
"</问题>\n",
}
return BaseToolOutput(search_engine(query=query, config=tool_config))

+ 13
- 0
src/mindpilot/app/tools/shell.py View File

@@ -0,0 +1,13 @@
# LangChain 的 Shell 工具
from langchain_community.tools import ShellTool

from ..pydantic_v1 import Field

from .tools_registry import BaseToolOutput, regist_tool


@regist_tool(title="系统命令")
def shell(query: str = Field(description="The command to execute")):
"""Use Shell to execute system shell commands"""
tool = ShellTool()
return BaseToolOutput(tool.run(tool_input=query))

+ 146
- 0
src/mindpilot/app/tools/tools_registry.py View File

@@ -0,0 +1,146 @@
import json
import re
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union

from langchain.agents import tool
from langchain_core.tools import BaseTool

from ..pydantic_v1 import BaseModel, Extra

__all__ = ["regist_tool", "BaseToolOutput"]


_TOOLS_REGISTRY = {}


# patch BaseTool to support extra fields e.g. a title
BaseTool.Config.extra = Extra.allow

# patch BaseTool to support tool parameters defined using pydantic Field


def _new_parse_input(
self,
tool_input: Union[str, Dict],
) -> Union[str, Dict[str, Any]]:
"""Convert tool input to pydantic model."""
input_args = self.args_schema
if isinstance(tool_input, str):
if input_args is not None:
key_ = next(iter(input_args.__fields__.keys()))
input_args.validate({key_: tool_input})
return tool_input
else:
if input_args is not None:
result = input_args.parse_obj(tool_input)
return result.dict()


def _new_to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
# For backwards compatibility, if run_input is a string,
# pass as a positional argument.
if isinstance(tool_input, str):
return (tool_input,), {}
else:
# for tool defined with `*args` parameters
# the args_schema has a field named `args`
# it should be expanded to actual *args
# e.g.: test_tools
# .test_named_tool_decorator_return_direct
# .search_api
if "args" in tool_input:
args = tool_input["args"]
if args is None:
tool_input.pop("args")
return (), tool_input
elif isinstance(args, tuple):
tool_input.pop("args")
return args, tool_input
return (), tool_input


BaseTool._parse_input = _new_parse_input
BaseTool._to_args_and_kwargs = _new_to_args_and_kwargs
###############################


def regist_tool(
*args: Any,
title: str = "",
description: str = "",
return_direct: bool = False,
args_schema: Optional[Type[BaseModel]] = None,
infer_schema: bool = True,
) -> Union[Callable, BaseTool]:
"""
wrapper of langchain tool decorator
add tool to regstiry automatically
"""

def _parse_tool(t: BaseTool):
nonlocal description, title

_TOOLS_REGISTRY[t.name] = t

# change default description
if not description:
if t.func is not None:
description = t.func.__doc__
elif t.coroutine is not None:
description = t.coroutine.__doc__
t.description = " ".join(re.split(r"\n+\s*", description))
# set a default title for human
if not title:
title = "".join([x.capitalize() for x in t.name.split("_")])
t.title = title

def wrapper(def_func: Callable) -> BaseTool:
partial_ = tool(
*args,
return_direct=return_direct,
args_schema=args_schema,
infer_schema=infer_schema,
)
t = partial_(def_func)
_parse_tool(t)
return t

if len(args) == 0:
return wrapper
else:
t = tool(
*args,
return_direct=return_direct,
args_schema=args_schema,
infer_schema=infer_schema,
)
_parse_tool(t)
return t


class BaseToolOutput:
"""
LLM 要求 Tool 的输出为 str,但 Tool 用在别处时希望它正常返回结构化数据。
只需要将 Tool 返回值用该类封装,能同时满足两者的需要。
基类简单的将返回值字符串化,或指定 format="json" 将其转为 json。
用户也可以继承该类定义自己的转换方法。
"""

def __init__(
self,
data: Any,
format: str = "",
data_alias: str = "",
**extras: Any,
) -> None:
self.data = data
self.format = format
self.extras = extras
if data_alias:
setattr(self, data_alias, property(lambda obj: obj.data))

def __str__(self) -> str:
if self.format == "json":
return json.dumps(self.data, ensure_ascii=False, indent=2)
else:
return str(self.data)

+ 33
- 0
src/mindpilot/app/tools/weather_check.py View File

@@ -0,0 +1,33 @@
"""
简单的单参数输入工具实现,用于查询现在天气的情况
"""
import requests

from ..pydantic_v1 import Field
# from chatchat.server.utils import get_tool_config

from .tools_registry import BaseToolOutput, regist_tool


@regist_tool(title="天气查询")
def weather_check(
city: str = Field(description="City name,include city and county,like '厦门'"),
):
"""Use this tool to check the weather at a specific city"""

tool_config = {
"use": False,
"api_key": "S8vrB4U_-c5mvAMiK",
}
api_key = tool_config.get("api_key")
url = f"https://api.seniverse.com/v3/weather/now.json?key={api_key}&location={city}&language=zh-Hans&unit=c"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
weather = {
"temperature": data["results"][0]["now"]["temperature"],
"description": data["results"][0]["now"]["text"],
}
return BaseToolOutput(weather)
else:
raise Exception(f"Failed to retrieve weather: {response.status_code}")

+ 19
- 0
src/mindpilot/app/tools/wolfram.py View File

@@ -0,0 +1,19 @@
# Langchain 自带的 Wolfram Alpha API 封装

from ..pydantic_v1 import Field
# from chatchat.server.utils import get_tool_config

from .tools_registry import BaseToolOutput, regist_tool


@regist_tool
def wolfram(query: str = Field(description="The formula to be calculated")):
"""Useful for when you need to calculate difficult formulas"""

from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper

wolfram = WolframAlphaAPIWrapper(
wolfram_alpha_appid="PWKVLW-6ETR93QX6Q"
)
ans = wolfram.run(query)
return BaseToolOutput(ans)

+ 127
- 4
src/mindpilot/app/utils.py View File

@@ -19,11 +19,31 @@ from typing import (
Union,
)

import httpx
import openai
from fastapi import FastAPI
from langchain.tools import BaseTool
from langchain_core.embeddings import Embeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.llms import OpenAI

# from chatchat.configs import (
# DEFAULT_EMBEDDING_MODEL,
# DEFAULT_LLM_MODEL,
# HTTPX_DEFAULT_TIMEOUT,
# MODEL_PLATFORMS,
# TEMPERATURE,
# log_verbose,
# )
# from chatchat.server.pydantic_v2 import BaseModel, Field

logger = logging.getLogger()


def set_httpx_config(
timeout: float = 300, # TODO 需要设计一个配置文件,修改为可以设置的timeout
proxy: Union[str, Dict] = None,
unused_proxies: List[str] = [],
timeout: float = 300, # TODO 需要设计一个配置文件,修改为可以设置的timeout
proxy: Union[str, Dict] = None,
unused_proxies: List[str] = [],
):
"""
设置httpx默认timeout。httpx默认timeout是5秒,在请求LLM回答时不够用。
@@ -73,4 +93,107 @@ def set_httpx_config(

import urllib.request

urllib.request.getproxies = _get_proxies
urllib.request.getproxies = _get_proxies


class MsgType:
TEXT = 1
IMAGE = 2
AUDIO = 3
VIDEO = 4


DEFAULT_LLM_MODEL = None # TODO 设计配置文件修改此处
TEMPERATURE = 0.8


def get_ChatOpenAI(
model_name: str = DEFAULT_LLM_MODEL,
temperature: float = TEMPERATURE,
max_tokens: int = None,
streaming: bool = True,
callbacks: List[Callable] = [],
verbose: bool = True,
local_wrap: bool = False, # use local wrapped api
**kwargs: Any,
) -> ChatOpenAI:
# model_info = get_model_info(model_name)
params = dict(
streaming=streaming,
verbose=verbose,
callbacks=callbacks,
model_name=model_name,
temperature=temperature,
max_tokens=max_tokens,
**kwargs,
)
try:
# if local_wrap:
# params.update(
# openai_api_base=f"{api_address()}/v1",
# openai_api_key="EMPTY",
# )
# else:
# params.update(
# # openai_api_base=model_info.get("api_base_url"),
# # openai_api_key=model_info.get("api_key"),
# # openai_proxy=model_info.get("api_proxy"),
# openai_api_base="",
# openai_api_key="",
# openai_proxy="",
# )
params.update(
openai_api_base="",
openai_api_key="",
openai_proxy="",
)
model = ChatOpenAI(**params)
except Exception as e:
logger.error(
f"failed to create ChatOpenAI for model: {model_name}.", exc_info=True
)
model = None
return model


def get_prompt_template(type: str, name: str) -> Optional[str]:
"""
从prompt_config中加载模板内容
type: "llm_chat","knowledge_base_chat","search_engine_chat"的其中一种,如果有新功能,应该进行加入。
"""

from .configs.prompt_config import PROMPT_TEMPLATES

return PROMPT_TEMPLATES.get(type, {}).get(name)


def get_tool(name: str = None) -> Union[BaseTool, Dict[str, BaseTool]]:
import importlib

from ..app import tools

importlib.reload(tools)

from ..app.tools import tools_registry

# update_search_local_knowledgebase_tool()

if name is None:
return tools_registry._TOOLS_REGISTRY
else:
return tools_registry._TOOLS_REGISTRY.get(name)


async def wrap_done(fn: Awaitable, event: asyncio.Event):
"""Wrap an awaitable with a event to signal when it's done or an exception is raised."""
try:
await fn
except Exception as e:
logging.exception(e)
msg = f"Caught exception: {e}"
logger.error(
f"{e.__class__.__name__}: {msg}", exc_info=e
)
finally:
# Signal the aiter to stop.
event.set()

Loading…
Cancel
Save