AstrBotDevs · Soulter · Dec 26, 2025 · Dec 24, 2025 · Dec 25, 2025 · Dec 26, 2025
diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py
@@ -77,10 +77,11 @@ async def reset(
     async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]:
         """Yields chunks *and* a final LLMResponse."""
         payload = {
-            "contexts": self.run_context.messages,
+            "contexts": self.run_context.messages,  # list[Message]
             "func_tool": self.req.func_tool,
             "model": self.req.model,  # NOTE: in fact, this arg is None in most cases
             "session_id": self.req.session_id,
+            "extra_user_content_parts": self.req.extra_user_content_parts,  # list[ContentPart]
         }
 
         if self.streaming:

diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py
@@ -14,6 +14,7 @@
 from astrbot import logger
 from astrbot.core.agent.message import (
     AssistantMessageSegment,
+    ContentPart,
     ToolCall,
     ToolCallMessageSegment,
 )
@@ -92,6 +93,8 @@ class ProviderRequest:
     """会话 ID"""
     image_urls: list[str] = field(default_factory=list)
     """图片 URL 列表"""
+    extra_user_content_parts: list[ContentPart] = field(default_factory=list)
+    """额外的用户消息内容部分列表，用于在用户消息后添加额外的内容块（如系统提醒、指令等）。支持 dict 或 ContentPart 对象"""
     func_tool: ToolSet | None = None
     """可用的函数工具"""
     contexts: list[dict] = field(default_factory=list)
@@ -166,13 +169,23 @@ def _print_friendly_context(self):
 
     async def assemble_context(self) -> dict:
         """将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。"""
+        # 构建内容块列表
+        content_blocks = []
+
+        # 1. 用户原始发言（OpenAI 建议：用户发言在前）
+        if self.prompt and self.prompt.strip():
+            content_blocks.append({"type": "text", "text": self.prompt})
+        elif self.image_urls:
+            # 如果没有文本但有图片，添加占位文本
+            content_blocks.append({"type": "text", "text": "[图片]"})
+
+        # 2. 额外的内容块（系统提醒、指令等）
+        if self.extra_user_content_parts:
+            for part in self.extra_user_content_parts:
+                content_blocks.append(part.model_dump())
+
+        # 3. 图片内容
         if self.image_urls:
-            user_content = {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": self.prompt if self.prompt else "[图片]"},
-                ],
-            }
             for image_url in self.image_urls:
                 if image_url.startswith("http"):
                     image_path = await download_image_by_url(image_url)
@@ -185,11 +198,21 @@ async def assemble_context(self) -> dict:
                 if not image_data:
                     logger.warning(f"图片 {image_url} 得到的结果为空，将忽略。")
                     continue
-                user_content["content"].append(
+                content_blocks.append(
                     {"type": "image_url", "image_url": {"url": image_data}},
                 )
-            return user_content
-        return {"role": "user", "content": self.prompt}
+
+        # 只有当只有一个来自 prompt 的文本块且没有额外内容块时，才降级为简单格式以保持向后兼容
+        if (
+            len(content_blocks) == 1
+            and content_blocks[0]["type"] == "text"
+            and not self.extra_user_content_parts
+            and not self.image_urls
+        ):
+            return {"role": "user", "content": content_blocks[0]["text"]}
+
+        # 否则返回多模态格式
+        return {"role": "user", "content": content_blocks}
 
     async def _encode_image_bs64(self, image_url: str) -> str:
         """将图片转换为 base64"""

diff --git a/astrbot/core/provider/provider.py b/astrbot/core/provider/provider.py
@@ -4,7 +4,7 @@
 from collections.abc import AsyncGenerator
 from typing import TypeAlias, Union
 
-from astrbot.core.agent.message import Message
+from astrbot.core.agent.message import ContentPart, Message
 from astrbot.core.agent.tool import ToolSet
 from astrbot.core.provider.entities import (
     LLMResponse,
@@ -103,6 +103,7 @@ async def text_chat(
         system_prompt: str | None = None,
         tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None,
         model: str | None = None,
+        extra_user_content_parts: list[ContentPart] | None = None,
         **kwargs,
     ) -> LLMResponse:
         """获得 LLM 的文本对话结果。会使用当前的模型进行对话。
@@ -114,6 +115,7 @@ async def text_chat(
             tools: tool set
             contexts: 上下文，和 prompt 二选一使用
             tool_calls_result: 回传给 LLM 的工具调用结果。参考: https://platform.openai.com/docs/guides/function-calling
+            extra_user_content_parts: 额外的内容块列表，用于在用户消息后添加额外的文本块（如系统提醒、指令等）
             kwargs: 其他参数
 
         Notes:

diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py
@@ -11,6 +11,7 @@
 
 from astrbot import logger
 from astrbot.api.provider import Provider
+from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
 from astrbot.core.provider.entities import LLMResponse, TokenUsage
 from astrbot.core.provider.func_tool_manager import ToolSet
 from astrbot.core.utils.io import download_image_by_url
@@ -296,13 +297,16 @@ async def text_chat(
         system_prompt=None,
         tool_calls_result=None,
         model=None,
+        extra_user_content_parts=None,
         **kwargs,
     ) -> LLMResponse:
         if contexts is None:
             contexts = []
         new_record = None
         if prompt is not None:
-            new_record = await self.assemble_context(prompt, image_urls)
+            new_record = await self.assemble_context(
+                prompt, image_urls, extra_user_content_parts
+            )
         context_query = self._ensure_message_to_dicts(contexts)
         if new_record:
             context_query.append(new_record)
@@ -350,13 +354,16 @@ async def text_chat_stream(
         system_prompt=None,
         tool_calls_result=None,
         model=None,
+        extra_user_content_parts=None,
         **kwargs,
     ):
         if contexts is None:
             contexts = []
         new_record = None
         if prompt is not None:
-            new_record = await self.assemble_context(prompt, image_urls)
+            new_record = await self.assemble_context(
+                prompt, image_urls, extra_user_content_parts
+            )
         context_query = self._ensure_message_to_dicts(contexts)
         if new_record:
             context_query.append(new_record)
@@ -388,15 +395,15 @@ async def text_chat_stream(
         async for llm_response in self._query_stream(payloads, func_tool):
             yield llm_response
 
-    async def assemble_context(self, text: str, image_urls: list[str] | None = None):
+    async def assemble_context(
+        self,
+        text: str,
+        image_urls: list[str] | None = None,
+        extra_user_content_parts: list[ContentPart] | None = None,
+    ):
         """组装上下文，支持文本和图片"""
-        if not image_urls:
-            return {"role": "user", "content": text}
-
-        content = []
-        content.append({"type": "text", "text": text})
 
-        for image_url in image_urls:
+        async def resolve_image_url(image_url: str) -> dict | None:
             if image_url.startswith("http"):
                 image_path = await download_image_by_url(image_url)
                 image_data = await self.encode_image_bs64(image_path)
@@ -408,28 +415,68 @@ async def assemble_context(self, text: str, image_urls: list[str] | None = None)
 
             if not image_data:
                 logger.warning(f"图片 {image_url} 得到的结果为空，将忽略。")
-                continue
+                return None
 
             # Get mime type for the image
             mime_type, _ = guess_type(image_url)
             if not mime_type:
                 mime_type = "image/jpeg"  # Default to JPEG if can't determine
 
-            content.append(
-                {
-                    "type": "image",
-                    "source": {
-                        "type": "base64",
-                        "media_type": mime_type,
-                        "data": (
-                            image_data.split("base64,")[1]
-                            if "base64," in image_data
-                            else image_data
-                        ),
-                    },
+            return {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": mime_type,
+                    "data": (
+                        image_data.split("base64,")[1]
+                        if "base64," in image_data
+                        else image_data
+                    ),
                 },
-            )
+            }
+
+        content = []
 
+        # 1. 用户原始发言（OpenAI 建议：用户发言在前）
+        if text:
+            content.append({"type": "text", "text": text})
+        elif image_urls:
+            # 如果没有文本但有图片，添加占位文本
+            content.append({"type": "text", "text": "[图片]"})
+        elif extra_user_content_parts:
+            # 如果只有额外内容块，也需要添加占位文本
+            content.append({"type": "text", "text": " "})
+
+        # 2. 额外的内容块（系统提醒、指令等）
+        if extra_user_content_parts:
+            for block in extra_user_content_parts:
+                if isinstance(block, TextPart):
+                    content.append({"type": "text", "text": block.text})
+                elif isinstance(block, ImageURLPart):
+                    image_dict = await resolve_image_url(block.image_url.url)
+                    if image_dict:
+                        content.append(image_dict)
+                else:
+                    raise ValueError(f"不支持的额外内容块类型: {type(block)}")
+
+        # 3. 图片内容
+        if image_urls:
+            for image_url in image_urls:
+                image_dict = await resolve_image_url(image_url)
+                if image_dict:
+                    content.append(image_dict)
+
+        # 如果只有主文本且没有额外内容块和图片，返回简单格式以保持向后兼容
+        if (
+            text
+            and not extra_user_content_parts
+            and not image_urls
+            and len(content) == 1
+            and content[0]["type"] == "text"
+        ):
+            return {"role": "user", "content": content[0]["text"]}
+
+        # 否则返回多模态格式
         return {"role": "user", "content": content}
 
     async def encode_image_bs64(self, image_url: str) -> str: