diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py
index 7eb90f3fc..88e302ad7 100644
--- a/astrbot/core/agent/runners/tool_loop_agent_runner.py
+++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py
@@ -77,10 +77,11 @@ async def reset(
async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]:
"""Yields chunks *and* a final LLMResponse."""
payload = {
- "contexts": self.run_context.messages,
+ "contexts": self.run_context.messages, # list[Message]
"func_tool": self.req.func_tool,
"model": self.req.model, # NOTE: in fact, this arg is None in most cases
"session_id": self.req.session_id,
+ "extra_user_content_parts": self.req.extra_user_content_parts, # list[ContentPart]
}
if self.streaming:
diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py
index d13e9b56a..8f1bc442e 100644
--- a/astrbot/core/provider/entities.py
+++ b/astrbot/core/provider/entities.py
@@ -14,6 +14,7 @@
from astrbot import logger
from astrbot.core.agent.message import (
AssistantMessageSegment,
+ ContentPart,
ToolCall,
ToolCallMessageSegment,
)
@@ -92,6 +93,8 @@ class ProviderRequest:
"""会话 ID"""
image_urls: list[str] = field(default_factory=list)
"""图片 URL 列表"""
+ extra_user_content_parts: list[ContentPart] = field(default_factory=list)
+ """额外的用户消息内容部分列表,用于在用户消息后添加额外的内容块(如系统提醒、指令等)。支持 dict 或 ContentPart 对象"""
func_tool: ToolSet | None = None
"""可用的函数工具"""
contexts: list[dict] = field(default_factory=list)
@@ -166,13 +169,23 @@ def _print_friendly_context(self):
async def assemble_context(self) -> dict:
"""将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。"""
+ # 构建内容块列表
+ content_blocks = []
+
+ # 1. 用户原始发言(OpenAI 建议:用户发言在前)
+ if self.prompt and self.prompt.strip():
+ content_blocks.append({"type": "text", "text": self.prompt})
+ elif self.image_urls:
+ # 如果没有文本但有图片,添加占位文本
+ content_blocks.append({"type": "text", "text": "[图片]"})
+
+ # 2. 额外的内容块(系统提醒、指令等)
+ if self.extra_user_content_parts:
+ for part in self.extra_user_content_parts:
+ content_blocks.append(part.model_dump())
+
+ # 3. 图片内容
if self.image_urls:
- user_content = {
- "role": "user",
- "content": [
- {"type": "text", "text": self.prompt if self.prompt else "[图片]"},
- ],
- }
for image_url in self.image_urls:
if image_url.startswith("http"):
image_path = await download_image_by_url(image_url)
@@ -185,11 +198,21 @@ async def assemble_context(self) -> dict:
if not image_data:
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
continue
- user_content["content"].append(
+ content_blocks.append(
{"type": "image_url", "image_url": {"url": image_data}},
)
- return user_content
- return {"role": "user", "content": self.prompt}
+
+ # 只有当只有一个来自 prompt 的文本块且没有额外内容块时,才降级为简单格式以保持向后兼容
+ if (
+ len(content_blocks) == 1
+ and content_blocks[0]["type"] == "text"
+ and not self.extra_user_content_parts
+ and not self.image_urls
+ ):
+ return {"role": "user", "content": content_blocks[0]["text"]}
+
+ # 否则返回多模态格式
+ return {"role": "user", "content": content_blocks}
async def _encode_image_bs64(self, image_url: str) -> str:
"""将图片转换为 base64"""
diff --git a/astrbot/core/provider/provider.py b/astrbot/core/provider/provider.py
index 7f21a2ee1..6fb6d8953 100644
--- a/astrbot/core/provider/provider.py
+++ b/astrbot/core/provider/provider.py
@@ -4,7 +4,7 @@
from collections.abc import AsyncGenerator
from typing import TypeAlias, Union
-from astrbot.core.agent.message import Message
+from astrbot.core.agent.message import ContentPart, Message
from astrbot.core.agent.tool import ToolSet
from astrbot.core.provider.entities import (
LLMResponse,
@@ -103,6 +103,7 @@ async def text_chat(
system_prompt: str | None = None,
tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None,
model: str | None = None,
+ extra_user_content_parts: list[ContentPart] | None = None,
**kwargs,
) -> LLMResponse:
"""获得 LLM 的文本对话结果。会使用当前的模型进行对话。
@@ -114,6 +115,7 @@ async def text_chat(
tools: tool set
contexts: 上下文,和 prompt 二选一使用
tool_calls_result: 回传给 LLM 的工具调用结果。参考: https://platform.openai.com/docs/guides/function-calling
+ extra_user_content_parts: 额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等)
kwargs: 其他参数
Notes:
diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py
index 0ff61e393..d86b8393e 100644
--- a/astrbot/core/provider/sources/anthropic_source.py
+++ b/astrbot/core/provider/sources/anthropic_source.py
@@ -11,6 +11,7 @@
from astrbot import logger
from astrbot.api.provider import Provider
+from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
from astrbot.core.provider.entities import LLMResponse, TokenUsage
from astrbot.core.provider.func_tool_manager import ToolSet
from astrbot.core.utils.io import download_image_by_url
@@ -296,13 +297,16 @@ async def text_chat(
system_prompt=None,
tool_calls_result=None,
model=None,
+ extra_user_content_parts=None,
**kwargs,
) -> LLMResponse:
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
- new_record = await self.assemble_context(prompt, image_urls)
+ new_record = await self.assemble_context(
+ prompt, image_urls, extra_user_content_parts
+ )
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
@@ -350,13 +354,16 @@ async def text_chat_stream(
system_prompt=None,
tool_calls_result=None,
model=None,
+ extra_user_content_parts=None,
**kwargs,
):
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
- new_record = await self.assemble_context(prompt, image_urls)
+ new_record = await self.assemble_context(
+ prompt, image_urls, extra_user_content_parts
+ )
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
@@ -388,15 +395,15 @@ async def text_chat_stream(
async for llm_response in self._query_stream(payloads, func_tool):
yield llm_response
- async def assemble_context(self, text: str, image_urls: list[str] | None = None):
+ async def assemble_context(
+ self,
+ text: str,
+ image_urls: list[str] | None = None,
+ extra_user_content_parts: list[ContentPart] | None = None,
+ ):
"""组装上下文,支持文本和图片"""
- if not image_urls:
- return {"role": "user", "content": text}
-
- content = []
- content.append({"type": "text", "text": text})
- for image_url in image_urls:
+ async def resolve_image_url(image_url: str) -> dict | None:
if image_url.startswith("http"):
image_path = await download_image_by_url(image_url)
image_data = await self.encode_image_bs64(image_path)
@@ -408,28 +415,68 @@ async def assemble_context(self, text: str, image_urls: list[str] | None = None)
if not image_data:
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
- continue
+ return None
# Get mime type for the image
mime_type, _ = guess_type(image_url)
if not mime_type:
mime_type = "image/jpeg" # Default to JPEG if can't determine
- content.append(
- {
- "type": "image",
- "source": {
- "type": "base64",
- "media_type": mime_type,
- "data": (
- image_data.split("base64,")[1]
- if "base64," in image_data
- else image_data
- ),
- },
+ return {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": mime_type,
+ "data": (
+ image_data.split("base64,")[1]
+ if "base64," in image_data
+ else image_data
+ ),
},
- )
+ }
+
+ content = []
+ # 1. 用户原始发言(OpenAI 建议:用户发言在前)
+ if text:
+ content.append({"type": "text", "text": text})
+ elif image_urls:
+ # 如果没有文本但有图片,添加占位文本
+ content.append({"type": "text", "text": "[图片]"})
+ elif extra_user_content_parts:
+ # 如果只有额外内容块,也需要添加占位文本
+ content.append({"type": "text", "text": " "})
+
+ # 2. 额外的内容块(系统提醒、指令等)
+ if extra_user_content_parts:
+ for block in extra_user_content_parts:
+ if isinstance(block, TextPart):
+ content.append({"type": "text", "text": block.text})
+ elif isinstance(block, ImageURLPart):
+ image_dict = await resolve_image_url(block.image_url.url)
+ if image_dict:
+ content.append(image_dict)
+ else:
+ raise ValueError(f"不支持的额外内容块类型: {type(block)}")
+
+ # 3. 图片内容
+ if image_urls:
+ for image_url in image_urls:
+ image_dict = await resolve_image_url(image_url)
+ if image_dict:
+ content.append(image_dict)
+
+ # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容
+ if (
+ text
+ and not extra_user_content_parts
+ and not image_urls
+ and len(content) == 1
+ and content[0]["type"] == "text"
+ ):
+ return {"role": "user", "content": content[0]["text"]}
+
+ # 否则返回多模态格式
return {"role": "user", "content": content}
async def encode_image_bs64(self, image_url: str) -> str:
diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py
index 7f3700643..46358ac26 100644
--- a/astrbot/core/provider/sources/gemini_source.py
+++ b/astrbot/core/provider/sources/gemini_source.py
@@ -13,6 +13,7 @@
import astrbot.core.message.components as Comp
from astrbot import logger
from astrbot.api.provider import Provider
+from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
from astrbot.core.message.message_event_result import MessageChain
from astrbot.core.provider.entities import LLMResponse, TokenUsage
from astrbot.core.provider.func_tool_manager import ToolSet
@@ -680,13 +681,16 @@ async def text_chat(
system_prompt=None,
tool_calls_result=None,
model=None,
+ extra_user_content_parts=None,
**kwargs,
) -> LLMResponse:
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
- new_record = await self.assemble_context(prompt, image_urls)
+ new_record = await self.assemble_context(
+ prompt, image_urls, extra_user_content_parts
+ )
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
@@ -732,13 +736,16 @@ async def text_chat_stream(
system_prompt=None,
tool_calls_result=None,
model=None,
+ extra_user_content_parts=None,
**kwargs,
) -> AsyncGenerator[LLMResponse, None]:
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
- new_record = await self.assemble_context(prompt, image_urls)
+ new_record = await self.assemble_context(
+ prompt, image_urls, extra_user_content_parts
+ )
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
@@ -797,33 +804,75 @@ def set_key(self, key):
self.chosen_api_key = key
self._init_client()
- async def assemble_context(self, text: str, image_urls: list[str] | None = None):
+ async def assemble_context(
+ self,
+ text: str,
+ image_urls: list[str] | None = None,
+ extra_user_content_parts: list[ContentPart] | None = None,
+ ):
"""组装上下文。"""
- if image_urls:
- user_content = {
- "role": "user",
- "content": [{"type": "text", "text": text if text else "[图片]"}],
+
+ async def resolve_image_part(image_url: str) -> dict | None:
+ if image_url.startswith("http"):
+ image_path = await download_image_by_url(image_url)
+ image_data = await self.encode_image_bs64(image_path)
+ elif image_url.startswith("file:///"):
+ image_path = image_url.replace("file:///", "")
+ image_data = await self.encode_image_bs64(image_path)
+ else:
+ image_data = await self.encode_image_bs64(image_url)
+ if not image_data:
+ logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
+ return None
+ return {
+ "type": "image_url",
+ "image_url": {"url": image_data},
}
- for image_url in image_urls:
- if image_url.startswith("http"):
- image_path = await download_image_by_url(image_url)
- image_data = await self.encode_image_bs64(image_path)
- elif image_url.startswith("file:///"):
- image_path = image_url.replace("file:///", "")
- image_data = await self.encode_image_bs64(image_path)
+
+ # 构建内容块列表
+ content_blocks = []
+
+ # 1. 用户原始发言(OpenAI 建议:用户发言在前)
+ if text:
+ content_blocks.append({"type": "text", "text": text})
+ elif image_urls:
+ # 如果没有文本但有图片,添加占位文本
+ content_blocks.append({"type": "text", "text": "[图片]"})
+ elif extra_user_content_parts:
+ # 如果只有额外内容块,也需要添加占位文本
+ content_blocks.append({"type": "text", "text": " "})
+
+ # 2. 额外的内容块(系统提醒、指令等)
+ if extra_user_content_parts:
+ for part in extra_user_content_parts:
+ if isinstance(part, TextPart):
+ content_blocks.append({"type": "text", "text": part.text})
+ elif isinstance(part, ImageURLPart):
+ image_part = await resolve_image_part(part.image_url.url)
+ if image_part:
+ content_blocks.append(image_part)
else:
- image_data = await self.encode_image_bs64(image_url)
- if not image_data:
- logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
- continue
- user_content["content"].append(
- {
- "type": "image_url",
- "image_url": {"url": image_data},
- },
- )
- return user_content
- return {"role": "user", "content": text}
+ raise ValueError(f"不支持的额外内容块类型: {type(part)}")
+
+ # 3. 图片内容
+ if image_urls:
+ for image_url in image_urls:
+ image_part = await resolve_image_part(image_url)
+ if image_part:
+ content_blocks.append(image_part)
+
+ # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容
+ if (
+ text
+ and not extra_user_content_parts
+ and not image_urls
+ and len(content_blocks) == 1
+ and content_blocks[0]["type"] == "text"
+ ):
+ return {"role": "user", "content": content_blocks[0]["text"]}
+
+ # 否则返回多模态格式
+ return {"role": "user", "content": content_blocks}
async def encode_image_bs64(self, image_url: str) -> str:
"""将图片转换为 base64"""
diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
index a716d0a5a..1212e8b00 100644
--- a/astrbot/core/provider/sources/openai_source.py
+++ b/astrbot/core/provider/sources/openai_source.py
@@ -17,7 +17,7 @@
import astrbot.core.message.components as Comp
from astrbot import logger
from astrbot.api.provider import Provider
-from astrbot.core.agent.message import Message
+from astrbot.core.agent.message import ContentPart, ImageURLPart, Message, TextPart
from astrbot.core.agent.tool import ToolSet
from astrbot.core.message.message_event_result import MessageChain
from astrbot.core.provider.entities import LLMResponse, TokenUsage, ToolCallsResult
@@ -348,6 +348,7 @@ async def _prepare_chat_payload(
system_prompt: str | None = None,
tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None,
model: str | None = None,
+ extra_user_content_parts: list[ContentPart] | None = None,
**kwargs,
) -> tuple:
"""准备聊天所需的有效载荷和上下文"""
@@ -355,7 +356,9 @@ async def _prepare_chat_payload(
contexts = []
new_record = None
if prompt is not None:
- new_record = await self.assemble_context(prompt, image_urls)
+ new_record = await self.assemble_context(
+ prompt, image_urls, extra_user_content_parts
+ )
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
@@ -476,6 +479,7 @@ async def text_chat(
system_prompt=None,
tool_calls_result=None,
model=None,
+ extra_user_content_parts=None,
**kwargs,
) -> LLMResponse:
payloads, context_query = await self._prepare_chat_payload(
@@ -485,6 +489,7 @@ async def text_chat(
system_prompt,
tool_calls_result,
model=model,
+ extra_user_content_parts=extra_user_content_parts,
**kwargs,
)
@@ -624,33 +629,71 @@ async def assemble_context(
self,
text: str,
image_urls: list[str] | None = None,
+ extra_user_content_parts: list[ContentPart] | None = None,
) -> dict:
"""组装成符合 OpenAI 格式的 role 为 user 的消息段"""
- if image_urls:
- user_content = {
- "role": "user",
- "content": [{"type": "text", "text": text if text else "[图片]"}],
+
+ async def resolve_image_part(image_url: str) -> dict | None:
+ if image_url.startswith("http"):
+ image_path = await download_image_by_url(image_url)
+ image_data = await self.encode_image_bs64(image_path)
+ elif image_url.startswith("file:///"):
+ image_path = image_url.replace("file:///", "")
+ image_data = await self.encode_image_bs64(image_path)
+ else:
+ image_data = await self.encode_image_bs64(image_url)
+ if not image_data:
+ logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
+ return None
+ return {
+ "type": "image_url",
+ "image_url": {"url": image_data},
}
- for image_url in image_urls:
- if image_url.startswith("http"):
- image_path = await download_image_by_url(image_url)
- image_data = await self.encode_image_bs64(image_path)
- elif image_url.startswith("file:///"):
- image_path = image_url.replace("file:///", "")
- image_data = await self.encode_image_bs64(image_path)
+
+ # 构建内容块列表
+ content_blocks = []
+
+ # 1. 用户原始发言(OpenAI 建议:用户发言在前)
+ if text:
+ content_blocks.append({"type": "text", "text": text})
+ elif image_urls:
+ # 如果没有文本但有图片,添加占位文本
+ content_blocks.append({"type": "text", "text": "[图片]"})
+ elif extra_user_content_parts:
+ # 如果只有额外内容块,也需要添加占位文本
+ content_blocks.append({"type": "text", "text": " "})
+
+ # 2. 额外的内容块(系统提醒、指令等)
+ if extra_user_content_parts:
+ for part in extra_user_content_parts:
+ if isinstance(part, TextPart):
+ content_blocks.append({"type": "text", "text": part.text})
+ elif isinstance(part, ImageURLPart):
+ image_part = await resolve_image_part(part.image_url.url)
+ if image_part:
+ content_blocks.append(image_part)
else:
- image_data = await self.encode_image_bs64(image_url)
- if not image_data:
- logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
- continue
- user_content["content"].append(
- {
- "type": "image_url",
- "image_url": {"url": image_data},
- },
- )
- return user_content
- return {"role": "user", "content": text}
+ raise ValueError(f"不支持的额外内容块类型: {type(part)}")
+
+ # 3. 图片内容
+ if image_urls:
+ for image_url in image_urls:
+ image_part = await resolve_image_part(image_url)
+ if image_part:
+ content_blocks.append(image_part)
+
+ # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容
+ if (
+ text
+ and not extra_user_content_parts
+ and not image_urls
+ and len(content_blocks) == 1
+ and content_blocks[0]["type"] == "text"
+ ):
+ return {"role": "user", "content": content_blocks[0]["text"]}
+
+ # 否则返回多模态格式
+ return {"role": "user", "content": content_blocks}
async def encode_image_bs64(self, image_url: str) -> str:
"""将图片转换为 base64"""
diff --git a/packages/astrbot/process_llm_request.py b/packages/astrbot/process_llm_request.py
index 89a4df3a2..28d0a34f4 100644
--- a/packages/astrbot/process_llm_request.py
+++ b/packages/astrbot/process_llm_request.py
@@ -7,6 +7,7 @@
from astrbot.api.event import AstrMessageEvent
from astrbot.api.message_components import Image, Reply
from astrbot.api.provider import Provider, ProviderRequest
+from astrbot.core.agent.message import TextPart
from astrbot.core.provider.func_tool_manager import ToolSet
@@ -85,7 +86,9 @@ async def _ensure_img_caption(
req.image_urls,
)
if caption:
- req.prompt = f"(Image Caption: {caption})\n\n{req.prompt}"
+ req.extra_user_content_parts.append(
+ TextPart(text=f"{caption}")
+ )
req.image_urls = []
except Exception as e:
logger.error(f"处理图片描述失败: {e}")
@@ -129,13 +132,14 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques
else:
req.prompt = prefix + req.prompt
+ # 收集系统提醒信息
+ system_parts = []
+
# user identifier
if cfg.get("identifier"):
user_id = event.message_obj.sender.user_id
user_nickname = event.message_obj.sender.nickname
- req.prompt = (
- f"\n[User ID: {user_id}, Nickname: {user_nickname}]\n{req.prompt}"
- )
+ system_parts.append(f"User ID: {user_id}, Nickname: {user_nickname}")
# group name identifier
if cfg.get("group_name_display") and event.message_obj.group_id:
@@ -146,7 +150,7 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques
return
group_name = event.message_obj.group.group_name
if group_name:
- req.system_prompt += f"\nGroup name: {group_name}\n"
+ system_parts.append(f"Group name: {group_name}")
# time info
if cfg.get("datetime_system_prompt"):
@@ -162,7 +166,7 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques
current_time = (
datetime.datetime.now().astimezone().strftime("%Y-%m-%d %H:%M (%Z)")
)
- req.system_prompt += f"\nCurrent datetime: {current_time}\n"
+ system_parts.append(f"Current datetime: {current_time}")
img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
if req.conversation:
@@ -225,10 +229,17 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques
except BaseException as e:
logger.error(f"处理引用图片失败: {e}")
- # 3. 将所有部分组合成文本并直接注入到当前消息中
+ # 3. 将所有部分组合成文本并添加到 extra_user_content_parts 中
# 确保引用内容被正确的标签包裹
quoted_content = "\n".join(content_parts)
# 确保所有内容都在标签内
quoted_text = f"\n{quoted_content}\n"
- req.prompt = f"{quoted_text}\n\n{req.prompt}"
+ req.extra_user_content_parts.append(TextPart(text=quoted_text))
+
+ # 统一包裹所有系统提醒
+ if system_parts:
+ system_content = (
+ "" + "\n".join(system_parts) + ""
+ )
+ req.extra_user_content_parts.append(TextPart(text=system_content))