From c5a2827def0e69b6c924a199442deef035bcebf6 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Thu, 25 Dec 2025 03:54:05 +0800 Subject: [PATCH 1/8] =?UTF-8?q?feat:=20=E5=A4=9A=E6=96=87=E6=9C=AC?= =?UTF-8?q?=E5=9D=97=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/provider/entities.py | 38 +++++-- .../core/provider/sources/anthropic_source.py | 99 ++++++++++++------- .../core/provider/sources/gemini_source.py | 37 +++++-- .../core/provider/sources/openai_source.py | 31 ++++-- packages/astrbot/process_llm_request.py | 29 ++++-- 5 files changed, 164 insertions(+), 70 deletions(-) diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py index d13e9b56a..5f794442e 100644 --- a/astrbot/core/provider/entities.py +++ b/astrbot/core/provider/entities.py @@ -92,6 +92,8 @@ class ProviderRequest: """会话 ID""" image_urls: list[str] = field(default_factory=list) """图片 URL 列表""" + extra_content_blocks: list[dict] = field(default_factory=list) + """额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等)""" func_tool: ToolSet | None = None """可用的函数工具""" contexts: list[dict] = field(default_factory=list) @@ -166,13 +168,21 @@ def _print_friendly_context(self): async def assemble_context(self) -> dict: """将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if self.prompt and self.prompt.strip(): + content_blocks.append({"type": "text", "text": self.prompt}) + elif self.image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + content_blocks.extend(self.extra_content_blocks) + + # 3. 图片内容 if self.image_urls: - user_content = { - "role": "user", - "content": [ - {"type": "text", "text": self.prompt if self.prompt else "[图片]"}, - ], - } for image_url in self.image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -185,11 +195,21 @@ async def assemble_context(self) -> dict: if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( {"type": "image_url", "image_url": {"url": image_data}}, ) - return user_content - return {"role": "user", "content": self.prompt} + + # 只有当只有一个来自 prompt 的文本块且没有额外内容块时,才降级为简单格式以保持向后兼容 + if ( + len(content_blocks) == 1 + and content_blocks[0]["type"] == "text" + and not self.extra_content_blocks + and not self.image_urls + ): + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def _encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index 0ff61e393..d982af2e4 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -388,48 +388,71 @@ async def text_chat_stream( async for llm_response in self._query_stream(payloads, func_tool): yield llm_response - async def assemble_context(self, text: str, image_urls: list[str] | None = None): + async def assemble_context( + self, + text: str, + image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, + ): """组装上下文,支持文本和图片""" - if not image_urls: - return {"role": "user", "content": text} - content = [] - content.append({"type": "text", "text": text}) - - for image_url in image_urls: - if image_url.startswith("http"): - image_path = await download_image_by_url(image_url) - image_data = await self.encode_image_bs64(image_path) - elif image_url.startswith("file:///"): - image_path = image_url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - else: - image_data = await self.encode_image_bs64(image_url) - - if not image_data: - logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") - continue - - # Get mime type for the image - mime_type, _ = guess_type(image_url) - if not mime_type: - mime_type = "image/jpeg" # Default to JPEG if can't determine - - content.append( - { - "type": "image", - "source": { - "type": "base64", - "media_type": mime_type, - "data": ( - image_data.split("base64,")[1] - if "base64," in image_data - else image_data - ), + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + # 过滤出文本块,因为 Anthropic 主要支持文本和图片 + text_blocks = [ + block for block in extra_content_blocks if block.get("type") == "text" + ] + content.extend(text_blocks) + + # 3. 图片内容 + if image_urls: + for image_url in image_urls: + if image_url.startswith("http"): + image_path = await download_image_by_url(image_url) + image_data = await self.encode_image_bs64(image_path) + elif image_url.startswith("file:///"): + image_path = image_url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + else: + image_data = await self.encode_image_bs64(image_url) + + if not image_data: + logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") + continue + + # Get mime type for the image + mime_type, _ = guess_type(image_url) + if not mime_type: + mime_type = "image/jpeg" # Default to JPEG if can't determine + + content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": ( + image_data.split("base64,")[1] + if "base64," in image_data + else image_data + ), + }, }, - }, - ) + ) + + # 如果只有一个文本块且没有图片,返回简单格式以保持向后兼容 + if len(content) == 1 and content[0]["type"] == "text": + return {"role": "user", "content": content[0]["text"]} + # 否则返回多模态格式 return {"role": "user", "content": content} async def encode_image_bs64(self, image_url: str) -> str: diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 7f3700643..487acd431 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -797,13 +797,29 @@ def set_key(self, key): self.chosen_api_key = key self._init_client() - async def assemble_context(self, text: str, image_urls: list[str] | None = None): + async def assemble_context( + self, + text: str, + image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, + ): """组装上下文。""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content_blocks.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + content_blocks.extend(extra_content_blocks) + + # 3. 图片内容 if image_urls: - user_content = { - "role": "user", - "content": [{"type": "text", "text": text if text else "[图片]"}], - } for image_url in image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -816,14 +832,19 @@ async def assemble_context(self, text: str, image_urls: list[str] | None = None) if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( { "type": "image_url", "image_url": {"url": image_data}, }, ) - return user_content - return {"role": "user", "content": text} + + # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 + if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index a716d0a5a..97bb992e7 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -624,13 +624,25 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, ) -> dict: """组装成符合 OpenAI 格式的 role 为 user 的消息段""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content_blocks.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + content_blocks.extend(extra_content_blocks) + + # 3. 图片内容 if image_urls: - user_content = { - "role": "user", - "content": [{"type": "text", "text": text if text else "[图片]"}], - } for image_url in image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -643,14 +655,19 @@ async def assemble_context( if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( { "type": "image_url", "image_url": {"url": image_data}, }, ) - return user_content - return {"role": "user", "content": text} + + # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 + if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/packages/astrbot/process_llm_request.py b/packages/astrbot/process_llm_request.py index 89a4df3a2..532aac219 100644 --- a/packages/astrbot/process_llm_request.py +++ b/packages/astrbot/process_llm_request.py @@ -85,7 +85,12 @@ async def _ensure_img_caption( req.image_urls, ) if caption: - req.prompt = f"(Image Caption: {caption})\n\n{req.prompt}" + req.extra_content_blocks.append( + { + "type": "text", + "text": f"{caption}", + } + ) req.image_urls = [] except Exception as e: logger.error(f"处理图片描述失败: {e}") @@ -129,13 +134,14 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques else: req.prompt = prefix + req.prompt + # 收集系统提醒信息 + system_parts = [] + # user identifier if cfg.get("identifier"): user_id = event.message_obj.sender.user_id user_nickname = event.message_obj.sender.nickname - req.prompt = ( - f"\n[User ID: {user_id}, Nickname: {user_nickname}]\n{req.prompt}" - ) + system_parts.append(f"User ID: {user_id}, Nickname: {user_nickname}") # group name identifier if cfg.get("group_name_display") and event.message_obj.group_id: @@ -146,7 +152,7 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques return group_name = event.message_obj.group.group_name if group_name: - req.system_prompt += f"\nGroup name: {group_name}\n" + system_parts.append(f"Group name: {group_name}") # time info if cfg.get("datetime_system_prompt"): @@ -162,7 +168,7 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques current_time = ( datetime.datetime.now().astimezone().strftime("%Y-%m-%d %H:%M (%Z)") ) - req.system_prompt += f"\nCurrent datetime: {current_time}\n" + system_parts.append(f"Current datetime: {current_time}") img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or "" if req.conversation: @@ -225,10 +231,17 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques except BaseException as e: logger.error(f"处理引用图片失败: {e}") - # 3. 将所有部分组合成文本并直接注入到当前消息中 + # 3. 将所有部分组合成文本并添加到 extra_content_blocks 中 # 确保引用内容被正确的标签包裹 quoted_content = "\n".join(content_parts) # 确保所有内容都在标签内 quoted_text = f"\n{quoted_content}\n" - req.prompt = f"{quoted_text}\n\n{req.prompt}" + req.extra_content_blocks.append({"type": "text", "text": quoted_text}) + + # 统一包裹所有系统提醒 + if system_parts: + system_content = ( + "" + "".join(system_parts) + "" + ) + req.extra_content_blocks.append({"type": "text", "text": system_content}) From 9449ff668b8ca2cded027a99183d73aa1044a985 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Thu, 25 Dec 2025 13:33:40 +0800 Subject: [PATCH 2/8] FIX --- astrbot/core/provider/sources/anthropic_source.py | 10 ++++++++-- astrbot/core/provider/sources/gemini_source.py | 10 ++++++++-- astrbot/core/provider/sources/openai_source.py | 10 ++++++++-- packages/astrbot/process_llm_request.py | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index d982af2e4..a55a3a0a1 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -448,8 +448,14 @@ async def assemble_context( }, ) - # 如果只有一个文本块且没有图片,返回简单格式以保持向后兼容 - if len(content) == 1 and content[0]["type"] == "text": + # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 + if ( + text + and not extra_content_blocks + and not image_urls + and len(content) == 1 + and content[0]["type"] == "text" + ): return {"role": "user", "content": content[0]["text"]} # 否则返回多模态格式 diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 487acd431..614c83aad 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -839,8 +839,14 @@ async def assemble_context( }, ) - # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 - if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 + if ( + text + and not extra_content_blocks + and not image_urls + and len(content_blocks) == 1 + and content_blocks[0]["type"] == "text" + ): return {"role": "user", "content": content_blocks[0]["text"]} # 否则返回多模态格式 diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 97bb992e7..fcd2e0e32 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -662,8 +662,14 @@ async def assemble_context( }, ) - # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 - if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 + if ( + text + and not extra_content_blocks + and not image_urls + and len(content_blocks) == 1 + and content_blocks[0]["type"] == "text" + ): return {"role": "user", "content": content_blocks[0]["text"]} # 否则返回多模态格式 diff --git a/packages/astrbot/process_llm_request.py b/packages/astrbot/process_llm_request.py index 532aac219..f787970c7 100644 --- a/packages/astrbot/process_llm_request.py +++ b/packages/astrbot/process_llm_request.py @@ -242,6 +242,6 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques # 统一包裹所有系统提醒 if system_parts: system_content = ( - "" + "".join(system_parts) + "" + "" + "\n".join(system_parts) + "" ) req.extra_content_blocks.append({"type": "text", "text": system_content}) From 17b52ab5ddb171f8161357872d7e4ecb0122d992 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Fri, 26 Dec 2025 18:57:51 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E4=BC=A0=E9=80=92=E9=93=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/agent/runners/tool_loop_agent_runner.py | 1 + astrbot/core/provider/provider.py | 2 ++ astrbot/core/provider/sources/anthropic_source.py | 13 +++++++++++-- astrbot/core/provider/sources/gemini_source.py | 13 +++++++++++-- astrbot/core/provider/sources/openai_source.py | 10 +++++++++- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 7eb90f3fc..1e04001e4 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -81,6 +81,7 @@ async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]: "func_tool": self.req.func_tool, "model": self.req.model, # NOTE: in fact, this arg is None in most cases "session_id": self.req.session_id, + "extra_content_blocks": self.req.extra_content_blocks, } if self.streaming: diff --git a/astrbot/core/provider/provider.py b/astrbot/core/provider/provider.py index 7f21a2ee1..b81e24da6 100644 --- a/astrbot/core/provider/provider.py +++ b/astrbot/core/provider/provider.py @@ -103,6 +103,7 @@ async def text_chat( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, + extra_content_blocks: list[dict] | None = None, **kwargs, ) -> LLMResponse: """获得 LLM 的文本对话结果。会使用当前的模型进行对话。 @@ -114,6 +115,7 @@ async def text_chat( tools: tool set contexts: 上下文,和 prompt 二选一使用 tool_calls_result: 回传给 LLM 的工具调用结果。参考: https://platform.openai.com/docs/guides/function-calling + extra_content_blocks: 额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等) kwargs: 其他参数 Notes: diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index a55a3a0a1..788047375 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -296,13 +296,16 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, + extra_content_blocks=None, **kwargs, ) -> LLMResponse: if contexts is None: contexts = [] new_record = None if prompt is not None: - new_record = await self.assemble_context(prompt, image_urls) + new_record = await self.assemble_context( + prompt, image_urls, extra_content_blocks + ) context_query = self._ensure_message_to_dicts(contexts) if new_record: context_query.append(new_record) @@ -350,13 +353,16 @@ async def text_chat_stream( system_prompt=None, tool_calls_result=None, model=None, + extra_content_blocks=None, **kwargs, ): if contexts is None: contexts = [] new_record = None if prompt is not None: - new_record = await self.assemble_context(prompt, image_urls) + new_record = await self.assemble_context( + prompt, image_urls, extra_content_blocks + ) context_query = self._ensure_message_to_dicts(contexts) if new_record: context_query.append(new_record) @@ -403,6 +409,9 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content.append({"type": "text", "text": "[图片]"}) + elif extra_content_blocks: + # 如果只有额外内容块,也需要添加占位文本 + content.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) if extra_content_blocks: diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 614c83aad..918bb1f87 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -680,13 +680,16 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, + extra_content_blocks=None, **kwargs, ) -> LLMResponse: if contexts is None: contexts = [] new_record = None if prompt is not None: - new_record = await self.assemble_context(prompt, image_urls) + new_record = await self.assemble_context( + prompt, image_urls, extra_content_blocks + ) context_query = self._ensure_message_to_dicts(contexts) if new_record: context_query.append(new_record) @@ -732,13 +735,16 @@ async def text_chat_stream( system_prompt=None, tool_calls_result=None, model=None, + extra_content_blocks=None, **kwargs, ) -> AsyncGenerator[LLMResponse, None]: if contexts is None: contexts = [] new_record = None if prompt is not None: - new_record = await self.assemble_context(prompt, image_urls) + new_record = await self.assemble_context( + prompt, image_urls, extra_content_blocks + ) context_query = self._ensure_message_to_dicts(contexts) if new_record: context_query.append(new_record) @@ -813,6 +819,9 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content_blocks.append({"type": "text", "text": "[图片]"}) + elif extra_content_blocks: + # 如果只有额外内容块,也需要添加占位文本 + content_blocks.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) if extra_content_blocks: diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index fcd2e0e32..8a9346cef 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -348,6 +348,7 @@ async def _prepare_chat_payload( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, + extra_content_blocks: list[dict] | None = None, **kwargs, ) -> tuple: """准备聊天所需的有效载荷和上下文""" @@ -355,7 +356,9 @@ async def _prepare_chat_payload( contexts = [] new_record = None if prompt is not None: - new_record = await self.assemble_context(prompt, image_urls) + new_record = await self.assemble_context( + prompt, image_urls, extra_content_blocks + ) context_query = self._ensure_message_to_dicts(contexts) if new_record: context_query.append(new_record) @@ -476,6 +479,7 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, + extra_content_blocks=None, **kwargs, ) -> LLMResponse: payloads, context_query = await self._prepare_chat_payload( @@ -485,6 +489,7 @@ async def text_chat( system_prompt, tool_calls_result, model=model, + extra_content_blocks=extra_content_blocks, **kwargs, ) @@ -636,6 +641,9 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content_blocks.append({"type": "text", "text": "[图片]"}) + elif extra_content_blocks: + # 如果只有额外内容块,也需要添加占位文本 + content_blocks.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) if extra_content_blocks: From 05012af627f988a614263a37a7b303883482ea48 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Fri, 26 Dec 2025 20:54:38 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../agent/runners/tool_loop_agent_runner.py | 2 +- astrbot/core/provider/entities.py | 18 +++++++++++++---- astrbot/core/provider/provider.py | 4 ++-- .../core/provider/sources/anthropic_source.py | 20 ++++++++++--------- .../core/provider/sources/gemini_source.py | 18 ++++++++--------- .../core/provider/sources/openai_source.py | 18 ++++++++--------- packages/astrbot/process_llm_request.py | 14 ++++++------- 7 files changed, 52 insertions(+), 42 deletions(-) diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 1e04001e4..82ab11b05 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -81,7 +81,7 @@ async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]: "func_tool": self.req.func_tool, "model": self.req.model, # NOTE: in fact, this arg is None in most cases "session_id": self.req.session_id, - "extra_content_blocks": self.req.extra_content_blocks, + "extra_user_content_parts": self.req.extra_user_content_parts, } if self.streaming: diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py index 5f794442e..5ddca16c9 100644 --- a/astrbot/core/provider/entities.py +++ b/astrbot/core/provider/entities.py @@ -14,6 +14,7 @@ from astrbot import logger from astrbot.core.agent.message import ( AssistantMessageSegment, + ContentPart, ToolCall, ToolCallMessageSegment, ) @@ -92,8 +93,10 @@ class ProviderRequest: """会话 ID""" image_urls: list[str] = field(default_factory=list) """图片 URL 列表""" - extra_content_blocks: list[dict] = field(default_factory=list) - """额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等)""" + extra_user_content_parts: list[dict] | list[ContentPart] = field( + default_factory=list + ) + """额外的用户消息内容部分列表,用于在用户消息后添加额外的内容块(如系统提醒、指令等)。支持 dict 或 ContentPart 对象""" func_tool: ToolSet | None = None """可用的函数工具""" contexts: list[dict] = field(default_factory=list) @@ -179,7 +182,14 @@ async def assemble_context(self) -> dict: content_blocks.append({"type": "text", "text": "[图片]"}) # 2. 额外的内容块(系统提醒、指令等) - content_blocks.extend(self.extra_content_blocks) + if self.extra_user_content_parts: + for part in self.extra_user_content_parts: + if hasattr(part, "model_dump"): + # ContentPart 对象,需要 model_dump + content_blocks.append(part.model_dump()) + else: + # 已经是 dict + content_blocks.append(part) # 3. 图片内容 if self.image_urls: @@ -203,7 +213,7 @@ async def assemble_context(self) -> dict: if ( len(content_blocks) == 1 and content_blocks[0]["type"] == "text" - and not self.extra_content_blocks + and not self.extra_user_content_parts and not self.image_urls ): return {"role": "user", "content": content_blocks[0]["text"]} diff --git a/astrbot/core/provider/provider.py b/astrbot/core/provider/provider.py index b81e24da6..fdcc6f238 100644 --- a/astrbot/core/provider/provider.py +++ b/astrbot/core/provider/provider.py @@ -103,7 +103,7 @@ async def text_chat( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, - extra_content_blocks: list[dict] | None = None, + extra_user_content_parts: list[dict] | None = None, **kwargs, ) -> LLMResponse: """获得 LLM 的文本对话结果。会使用当前的模型进行对话。 @@ -115,7 +115,7 @@ async def text_chat( tools: tool set contexts: 上下文,和 prompt 二选一使用 tool_calls_result: 回传给 LLM 的工具调用结果。参考: https://platform.openai.com/docs/guides/function-calling - extra_content_blocks: 额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等) + extra_user_content_parts: 额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等) kwargs: 其他参数 Notes: diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index 788047375..a4ad9e832 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -296,7 +296,7 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, - extra_content_blocks=None, + extra_user_content_parts=None, **kwargs, ) -> LLMResponse: if contexts is None: @@ -304,7 +304,7 @@ async def text_chat( new_record = None if prompt is not None: new_record = await self.assemble_context( - prompt, image_urls, extra_content_blocks + prompt, image_urls, extra_user_content_parts ) context_query = self._ensure_message_to_dicts(contexts) if new_record: @@ -353,7 +353,7 @@ async def text_chat_stream( system_prompt=None, tool_calls_result=None, model=None, - extra_content_blocks=None, + extra_user_content_parts=None, **kwargs, ): if contexts is None: @@ -361,7 +361,7 @@ async def text_chat_stream( new_record = None if prompt is not None: new_record = await self.assemble_context( - prompt, image_urls, extra_content_blocks + prompt, image_urls, extra_user_content_parts ) context_query = self._ensure_message_to_dicts(contexts) if new_record: @@ -398,7 +398,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_content_blocks: list[dict] | None = None, + extra_user_content_parts: list[dict] | None = None, ): """组装上下文,支持文本和图片""" content = [] @@ -409,15 +409,17 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content.append({"type": "text", "text": "[图片]"}) - elif extra_content_blocks: + elif extra_user_content_parts: # 如果只有额外内容块,也需要添加占位文本 content.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) - if extra_content_blocks: + if extra_user_content_parts: # 过滤出文本块,因为 Anthropic 主要支持文本和图片 text_blocks = [ - block for block in extra_content_blocks if block.get("type") == "text" + block + for block in extra_user_content_parts + if block.get("type") == "text" ] content.extend(text_blocks) @@ -460,7 +462,7 @@ async def assemble_context( # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if ( text - and not extra_content_blocks + and not extra_user_content_parts and not image_urls and len(content) == 1 and content[0]["type"] == "text" diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 918bb1f87..0dfe048de 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -680,7 +680,7 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, - extra_content_blocks=None, + extra_user_content_parts=None, **kwargs, ) -> LLMResponse: if contexts is None: @@ -688,7 +688,7 @@ async def text_chat( new_record = None if prompt is not None: new_record = await self.assemble_context( - prompt, image_urls, extra_content_blocks + prompt, image_urls, extra_user_content_parts ) context_query = self._ensure_message_to_dicts(contexts) if new_record: @@ -735,7 +735,7 @@ async def text_chat_stream( system_prompt=None, tool_calls_result=None, model=None, - extra_content_blocks=None, + extra_user_content_parts=None, **kwargs, ) -> AsyncGenerator[LLMResponse, None]: if contexts is None: @@ -743,7 +743,7 @@ async def text_chat_stream( new_record = None if prompt is not None: new_record = await self.assemble_context( - prompt, image_urls, extra_content_blocks + prompt, image_urls, extra_user_content_parts ) context_query = self._ensure_message_to_dicts(contexts) if new_record: @@ -807,7 +807,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_content_blocks: list[dict] | None = None, + extra_user_content_parts: list[dict] | None = None, ): """组装上下文。""" # 构建内容块列表 @@ -819,13 +819,13 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content_blocks.append({"type": "text", "text": "[图片]"}) - elif extra_content_blocks: + elif extra_user_content_parts: # 如果只有额外内容块,也需要添加占位文本 content_blocks.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) - if extra_content_blocks: - content_blocks.extend(extra_content_blocks) + if extra_user_content_parts: + content_blocks.extend(extra_user_content_parts) # 3. 图片内容 if image_urls: @@ -851,7 +851,7 @@ async def assemble_context( # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if ( text - and not extra_content_blocks + and not extra_user_content_parts and not image_urls and len(content_blocks) == 1 and content_blocks[0]["type"] == "text" diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 8a9346cef..5a7baf53f 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -348,7 +348,7 @@ async def _prepare_chat_payload( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, - extra_content_blocks: list[dict] | None = None, + extra_user_content_parts: list[dict] | None = None, **kwargs, ) -> tuple: """准备聊天所需的有效载荷和上下文""" @@ -357,7 +357,7 @@ async def _prepare_chat_payload( new_record = None if prompt is not None: new_record = await self.assemble_context( - prompt, image_urls, extra_content_blocks + prompt, image_urls, extra_user_content_parts ) context_query = self._ensure_message_to_dicts(contexts) if new_record: @@ -479,7 +479,7 @@ async def text_chat( system_prompt=None, tool_calls_result=None, model=None, - extra_content_blocks=None, + extra_user_content_parts=None, **kwargs, ) -> LLMResponse: payloads, context_query = await self._prepare_chat_payload( @@ -489,7 +489,7 @@ async def text_chat( system_prompt, tool_calls_result, model=model, - extra_content_blocks=extra_content_blocks, + extra_user_content_parts=extra_user_content_parts, **kwargs, ) @@ -629,7 +629,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_content_blocks: list[dict] | None = None, + extra_user_content_parts: list[dict] | None = None, ) -> dict: """组装成符合 OpenAI 格式的 role 为 user 的消息段""" # 构建内容块列表 @@ -641,13 +641,13 @@ async def assemble_context( elif image_urls: # 如果没有文本但有图片,添加占位文本 content_blocks.append({"type": "text", "text": "[图片]"}) - elif extra_content_blocks: + elif extra_user_content_parts: # 如果只有额外内容块,也需要添加占位文本 content_blocks.append({"type": "text", "text": " "}) # 2. 额外的内容块(系统提醒、指令等) - if extra_content_blocks: - content_blocks.extend(extra_content_blocks) + if extra_user_content_parts: + content_blocks.extend(extra_user_content_parts) # 3. 图片内容 if image_urls: @@ -673,7 +673,7 @@ async def assemble_context( # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if ( text - and not extra_content_blocks + and not extra_user_content_parts and not image_urls and len(content_blocks) == 1 and content_blocks[0]["type"] == "text" diff --git a/packages/astrbot/process_llm_request.py b/packages/astrbot/process_llm_request.py index f787970c7..28d0a34f4 100644 --- a/packages/astrbot/process_llm_request.py +++ b/packages/astrbot/process_llm_request.py @@ -7,6 +7,7 @@ from astrbot.api.event import AstrMessageEvent from astrbot.api.message_components import Image, Reply from astrbot.api.provider import Provider, ProviderRequest +from astrbot.core.agent.message import TextPart from astrbot.core.provider.func_tool_manager import ToolSet @@ -85,11 +86,8 @@ async def _ensure_img_caption( req.image_urls, ) if caption: - req.extra_content_blocks.append( - { - "type": "text", - "text": f"{caption}", - } + req.extra_user_content_parts.append( + TextPart(text=f"{caption}") ) req.image_urls = [] except Exception as e: @@ -231,17 +229,17 @@ async def process_llm_request(self, event: AstrMessageEvent, req: ProviderReques except BaseException as e: logger.error(f"处理引用图片失败: {e}") - # 3. 将所有部分组合成文本并添加到 extra_content_blocks 中 + # 3. 将所有部分组合成文本并添加到 extra_user_content_parts 中 # 确保引用内容被正确的标签包裹 quoted_content = "\n".join(content_parts) # 确保所有内容都在标签内 quoted_text = f"\n{quoted_content}\n" - req.extra_content_blocks.append({"type": "text", "text": quoted_text}) + req.extra_user_content_parts.append(TextPart(text=quoted_text)) # 统一包裹所有系统提醒 if system_parts: system_content = ( "" + "\n".join(system_parts) + "" ) - req.extra_content_blocks.append({"type": "text", "text": system_content}) + req.extra_user_content_parts.append(TextPart(text=system_content)) From 7c1dbecea584095e9d9d5f0fc84bf12356d7349b Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 26 Dec 2025 21:47:02 +0800 Subject: [PATCH 5/8] refactor: unify extra_user_content_parts type to ContentPart across providers and update related handling --- astrbot/core/agent/runners/tool_loop_agent_runner.py | 4 ++-- astrbot/core/provider/entities.py | 11 ++--------- astrbot/core/provider/provider.py | 4 ++-- astrbot/core/provider/sources/anthropic_source.py | 7 +++---- astrbot/core/provider/sources/gemini_source.py | 6 ++++-- astrbot/core/provider/sources/openai_source.py | 9 +++++---- 6 files changed, 18 insertions(+), 23 deletions(-) diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 82ab11b05..88e302ad7 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -77,11 +77,11 @@ async def reset( async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]: """Yields chunks *and* a final LLMResponse.""" payload = { - "contexts": self.run_context.messages, + "contexts": self.run_context.messages, # list[Message] "func_tool": self.req.func_tool, "model": self.req.model, # NOTE: in fact, this arg is None in most cases "session_id": self.req.session_id, - "extra_user_content_parts": self.req.extra_user_content_parts, + "extra_user_content_parts": self.req.extra_user_content_parts, # list[ContentPart] } if self.streaming: diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py index 5ddca16c9..8f1bc442e 100644 --- a/astrbot/core/provider/entities.py +++ b/astrbot/core/provider/entities.py @@ -93,9 +93,7 @@ class ProviderRequest: """会话 ID""" image_urls: list[str] = field(default_factory=list) """图片 URL 列表""" - extra_user_content_parts: list[dict] | list[ContentPart] = field( - default_factory=list - ) + extra_user_content_parts: list[ContentPart] = field(default_factory=list) """额外的用户消息内容部分列表,用于在用户消息后添加额外的内容块(如系统提醒、指令等)。支持 dict 或 ContentPart 对象""" func_tool: ToolSet | None = None """可用的函数工具""" @@ -184,12 +182,7 @@ async def assemble_context(self) -> dict: # 2. 额外的内容块(系统提醒、指令等) if self.extra_user_content_parts: for part in self.extra_user_content_parts: - if hasattr(part, "model_dump"): - # ContentPart 对象,需要 model_dump - content_blocks.append(part.model_dump()) - else: - # 已经是 dict - content_blocks.append(part) + content_blocks.append(part.model_dump()) # 3. 图片内容 if self.image_urls: diff --git a/astrbot/core/provider/provider.py b/astrbot/core/provider/provider.py index fdcc6f238..6fb6d8953 100644 --- a/astrbot/core/provider/provider.py +++ b/astrbot/core/provider/provider.py @@ -4,7 +4,7 @@ from collections.abc import AsyncGenerator from typing import TypeAlias, Union -from astrbot.core.agent.message import Message +from astrbot.core.agent.message import ContentPart, Message from astrbot.core.agent.tool import ToolSet from astrbot.core.provider.entities import ( LLMResponse, @@ -103,7 +103,7 @@ async def text_chat( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, - extra_user_content_parts: list[dict] | None = None, + extra_user_content_parts: list[ContentPart] | None = None, **kwargs, ) -> LLMResponse: """获得 LLM 的文本对话结果。会使用当前的模型进行对话。 diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index a4ad9e832..2552736d3 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -11,6 +11,7 @@ from astrbot import logger from astrbot.api.provider import Provider +from astrbot.core.agent.message import ContentPart from astrbot.core.provider.entities import LLMResponse, TokenUsage from astrbot.core.provider.func_tool_manager import ToolSet from astrbot.core.utils.io import download_image_by_url @@ -398,7 +399,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_user_content_parts: list[dict] | None = None, + extra_user_content_parts: list[ContentPart] | None = None, ): """组装上下文,支持文本和图片""" content = [] @@ -417,9 +418,7 @@ async def assemble_context( if extra_user_content_parts: # 过滤出文本块,因为 Anthropic 主要支持文本和图片 text_blocks = [ - block - for block in extra_user_content_parts - if block.get("type") == "text" + block for block in extra_user_content_parts if block.type == "text" ] content.extend(text_blocks) diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 0dfe048de..282e1289e 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -13,6 +13,7 @@ import astrbot.core.message.components as Comp from astrbot import logger from astrbot.api.provider import Provider +from astrbot.core.agent.message import ContentPart from astrbot.core.message.message_event_result import MessageChain from astrbot.core.provider.entities import LLMResponse, TokenUsage from astrbot.core.provider.func_tool_manager import ToolSet @@ -807,7 +808,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_user_content_parts: list[dict] | None = None, + extra_user_content_parts: list[ContentPart] | None = None, ): """组装上下文。""" # 构建内容块列表 @@ -825,7 +826,8 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: - content_blocks.extend(extra_user_content_parts) + for part in extra_user_content_parts: + content_blocks.append(part.model_dump()) # 3. 图片内容 if image_urls: diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 5a7baf53f..df17b64b5 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -17,7 +17,7 @@ import astrbot.core.message.components as Comp from astrbot import logger from astrbot.api.provider import Provider -from astrbot.core.agent.message import Message +from astrbot.core.agent.message import ContentPart, Message from astrbot.core.agent.tool import ToolSet from astrbot.core.message.message_event_result import MessageChain from astrbot.core.provider.entities import LLMResponse, TokenUsage, ToolCallsResult @@ -348,7 +348,7 @@ async def _prepare_chat_payload( system_prompt: str | None = None, tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None, model: str | None = None, - extra_user_content_parts: list[dict] | None = None, + extra_user_content_parts: list[ContentPart] | None = None, **kwargs, ) -> tuple: """准备聊天所需的有效载荷和上下文""" @@ -629,7 +629,7 @@ async def assemble_context( self, text: str, image_urls: list[str] | None = None, - extra_user_content_parts: list[dict] | None = None, + extra_user_content_parts: list[ContentPart] | None = None, ) -> dict: """组装成符合 OpenAI 格式的 role 为 user 的消息段""" # 构建内容块列表 @@ -647,7 +647,8 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: - content_blocks.extend(extra_user_content_parts) + for part in extra_user_content_parts: + content_blocks.append(part.model_dump()) # 3. 图片内容 if image_urls: From aeb21f719efbe06bdb074ee37b84dec9fc9c666f Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Fri, 26 Dec 2025 21:54:01 +0800 Subject: [PATCH 6/8] =?UTF-8?q?claude=E9=A2=9D=E5=A4=96=E5=9D=97=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E5=9B=BE=E7=89=87=E6=A8=A1=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/provider/sources/anthropic_source.py | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index 2552736d3..7ff8b0df5 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -416,11 +416,47 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: - # 过滤出文本块,因为 Anthropic 主要支持文本和图片 - text_blocks = [ - block for block in extra_user_content_parts if block.type == "text" - ] - content.extend(text_blocks) + for block in extra_user_content_parts: + block_type = block.get("type") + + if block_type == "text": + # 文本直接添加 + content.append(block) + + elif block_type == "image_url": + # 转换 OpenAI 格式的图片为 Anthropic 格式 + image_url_data = block.get("image_url", {}) + if isinstance(image_url_data, dict): + url = image_url_data.get("url", "") + else: + # 兼容直接传 URL 字符串的情况 + url = str(image_url_data) + + if url and url.startswith("data:"): + try: + # 提取 MIME 类型和 base64 数据 + mime_type = url.split(":")[1].split(";")[0] + base64_data = ( + url.split("base64,")[1] if "base64," in url else url + ) + content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": base64_data, + }, + } + ) + except Exception as e: + logger.warning(f"转换 image_url 到 Anthropic 格式失败: {e}") + else: + logger.warning(f"image_url 不是有效的 data URI: {url[:50]}...") + + else: + # 其他类型(如 audio_url)Anthropic 不支持,记录警告 + logger.debug(f"Anthropic 不支持的内容类型 '{block_type}',已忽略") # 3. 图片内容 if image_urls: From 088659358c2bb3a086ee50501bc23b103007ec03 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Fri, 26 Dec 2025 21:59:22 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E5=B7=B2=E7=BB=8F=E5=A4=84=E7=90=86?= =?UTF-8?q?=E8=BF=87=E4=BA=86=E4=B8=8D=E7=94=A8=E5=86=8D=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/provider/sources/anthropic_source.py | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index 7ff8b0df5..cd74c785c 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -432,13 +432,33 @@ async def assemble_context( # 兼容直接传 URL 字符串的情况 url = str(image_url_data) - if url and url.startswith("data:"): + # 处理不同格式的 URL + if url: try: - # 提取 MIME 类型和 base64 数据 - mime_type = url.split(":")[1].split(";")[0] - base64_data = ( - url.split("base64,")[1] if "base64," in url else url - ) + if url.startswith("data:"): + # 已经是 data URI,直接提取 + mime_type = url.split(":")[1].split(";")[0] + base64_data = ( + url.split("base64,")[1] if "base64," in url else url + ) + elif url.startswith("http"): + # HTTP URL,需要下载并转换 + image_path = await download_image_by_url(url) + image_data = await self.encode_image_bs64(image_path) + mime_type = image_data.split(":")[1].split(";")[0] + base64_data = image_data.split("base64,")[1] + elif url.startswith("file:///"): + # 文件路径,需要读取并转换 + image_path = url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + mime_type = image_data.split(":")[1].split(";")[0] + base64_data = image_data.split("base64,")[1] + else: + # 假设是本地文件路径 + image_data = await self.encode_image_bs64(url) + mime_type = image_data.split(":")[1].split(";")[0] + base64_data = image_data.split("base64,")[1] + content.append( { "type": "image", @@ -450,9 +470,9 @@ async def assemble_context( } ) except Exception as e: - logger.warning(f"转换 image_url 到 Anthropic 格式失败: {e}") - else: - logger.warning(f"image_url 不是有效的 data URI: {url[:50]}...") + logger.warning( + f"转换 image_url 到 Anthropic 格式失败: {e}, url={url[:50]}..." + ) else: # 其他类型(如 audio_url)Anthropic 不支持,记录警告 From 4c5cc62baf1bec2f8341bec492fe0adc6c425cbf Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 26 Dec 2025 22:07:19 +0800 Subject: [PATCH 8/8] feat: enhance image handling in extra content blocks for multiple providers --- .../core/provider/sources/anthropic_source.py | 136 ++++++------------ .../core/provider/sources/gemini_source.py | 49 ++++--- .../core/provider/sources/openai_source.py | 49 ++++--- 3 files changed, 104 insertions(+), 130 deletions(-) diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index cd74c785c..d86b8393e 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -11,7 +11,7 @@ from astrbot import logger from astrbot.api.provider import Provider -from astrbot.core.agent.message import ContentPart +from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart from astrbot.core.provider.entities import LLMResponse, TokenUsage from astrbot.core.provider.func_tool_manager import ToolSet from astrbot.core.utils.io import download_image_by_url @@ -402,6 +402,39 @@ async def assemble_context( extra_user_content_parts: list[ContentPart] | None = None, ): """组装上下文,支持文本和图片""" + + async def resolve_image_url(image_url: str) -> dict | None: + if image_url.startswith("http"): + image_path = await download_image_by_url(image_url) + image_data = await self.encode_image_bs64(image_path) + elif image_url.startswith("file:///"): + image_path = image_url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + else: + image_data = await self.encode_image_bs64(image_url) + + if not image_data: + logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") + return None + + # Get mime type for the image + mime_type, _ = guess_type(image_url) + if not mime_type: + mime_type = "image/jpeg" # Default to JPEG if can't determine + + return { + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": ( + image_data.split("base64,")[1] + if "base64," in image_data + else image_data + ), + }, + } + content = [] # 1. 用户原始发言(OpenAI 建议:用户发言在前) @@ -417,102 +450,21 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: for block in extra_user_content_parts: - block_type = block.get("type") - - if block_type == "text": - # 文本直接添加 - content.append(block) - - elif block_type == "image_url": - # 转换 OpenAI 格式的图片为 Anthropic 格式 - image_url_data = block.get("image_url", {}) - if isinstance(image_url_data, dict): - url = image_url_data.get("url", "") - else: - # 兼容直接传 URL 字符串的情况 - url = str(image_url_data) - - # 处理不同格式的 URL - if url: - try: - if url.startswith("data:"): - # 已经是 data URI,直接提取 - mime_type = url.split(":")[1].split(";")[0] - base64_data = ( - url.split("base64,")[1] if "base64," in url else url - ) - elif url.startswith("http"): - # HTTP URL,需要下载并转换 - image_path = await download_image_by_url(url) - image_data = await self.encode_image_bs64(image_path) - mime_type = image_data.split(":")[1].split(";")[0] - base64_data = image_data.split("base64,")[1] - elif url.startswith("file:///"): - # 文件路径,需要读取并转换 - image_path = url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - mime_type = image_data.split(":")[1].split(";")[0] - base64_data = image_data.split("base64,")[1] - else: - # 假设是本地文件路径 - image_data = await self.encode_image_bs64(url) - mime_type = image_data.split(":")[1].split(";")[0] - base64_data = image_data.split("base64,")[1] - - content.append( - { - "type": "image", - "source": { - "type": "base64", - "media_type": mime_type, - "data": base64_data, - }, - } - ) - except Exception as e: - logger.warning( - f"转换 image_url 到 Anthropic 格式失败: {e}, url={url[:50]}..." - ) - + if isinstance(block, TextPart): + content.append({"type": "text", "text": block.text}) + elif isinstance(block, ImageURLPart): + image_dict = await resolve_image_url(block.image_url.url) + if image_dict: + content.append(image_dict) else: - # 其他类型(如 audio_url)Anthropic 不支持,记录警告 - logger.debug(f"Anthropic 不支持的内容类型 '{block_type}',已忽略") + raise ValueError(f"不支持的额外内容块类型: {type(block)}") # 3. 图片内容 if image_urls: for image_url in image_urls: - if image_url.startswith("http"): - image_path = await download_image_by_url(image_url) - image_data = await self.encode_image_bs64(image_path) - elif image_url.startswith("file:///"): - image_path = image_url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - else: - image_data = await self.encode_image_bs64(image_url) - - if not image_data: - logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") - continue - - # Get mime type for the image - mime_type, _ = guess_type(image_url) - if not mime_type: - mime_type = "image/jpeg" # Default to JPEG if can't determine - - content.append( - { - "type": "image", - "source": { - "type": "base64", - "media_type": mime_type, - "data": ( - image_data.split("base64,")[1] - if "base64," in image_data - else image_data - ), - }, - }, - ) + image_dict = await resolve_image_url(image_url) + if image_dict: + content.append(image_dict) # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if ( diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 282e1289e..46358ac26 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -13,7 +13,7 @@ import astrbot.core.message.components as Comp from astrbot import logger from astrbot.api.provider import Provider -from astrbot.core.agent.message import ContentPart +from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart from astrbot.core.message.message_event_result import MessageChain from astrbot.core.provider.entities import LLMResponse, TokenUsage from astrbot.core.provider.func_tool_manager import ToolSet @@ -811,6 +811,24 @@ async def assemble_context( extra_user_content_parts: list[ContentPart] | None = None, ): """组装上下文。""" + + async def resolve_image_part(image_url: str) -> dict | None: + if image_url.startswith("http"): + image_path = await download_image_by_url(image_url) + image_data = await self.encode_image_bs64(image_path) + elif image_url.startswith("file:///"): + image_path = image_url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + else: + image_data = await self.encode_image_bs64(image_url) + if not image_data: + logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") + return None + return { + "type": "image_url", + "image_url": {"url": image_data}, + } + # 构建内容块列表 content_blocks = [] @@ -827,28 +845,21 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: for part in extra_user_content_parts: - content_blocks.append(part.model_dump()) + if isinstance(part, TextPart): + content_blocks.append({"type": "text", "text": part.text}) + elif isinstance(part, ImageURLPart): + image_part = await resolve_image_part(part.image_url.url) + if image_part: + content_blocks.append(image_part) + else: + raise ValueError(f"不支持的额外内容块类型: {type(part)}") # 3. 图片内容 if image_urls: for image_url in image_urls: - if image_url.startswith("http"): - image_path = await download_image_by_url(image_url) - image_data = await self.encode_image_bs64(image_path) - elif image_url.startswith("file:///"): - image_path = image_url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - else: - image_data = await self.encode_image_bs64(image_url) - if not image_data: - logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") - continue - content_blocks.append( - { - "type": "image_url", - "image_url": {"url": image_data}, - }, - ) + image_part = await resolve_image_part(image_url) + if image_part: + content_blocks.append(image_part) # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if ( diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index df17b64b5..1212e8b00 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -17,7 +17,7 @@ import astrbot.core.message.components as Comp from astrbot import logger from astrbot.api.provider import Provider -from astrbot.core.agent.message import ContentPart, Message +from astrbot.core.agent.message import ContentPart, ImageURLPart, Message, TextPart from astrbot.core.agent.tool import ToolSet from astrbot.core.message.message_event_result import MessageChain from astrbot.core.provider.entities import LLMResponse, TokenUsage, ToolCallsResult @@ -632,6 +632,24 @@ async def assemble_context( extra_user_content_parts: list[ContentPart] | None = None, ) -> dict: """组装成符合 OpenAI 格式的 role 为 user 的消息段""" + + async def resolve_image_part(image_url: str) -> dict | None: + if image_url.startswith("http"): + image_path = await download_image_by_url(image_url) + image_data = await self.encode_image_bs64(image_path) + elif image_url.startswith("file:///"): + image_path = image_url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + else: + image_data = await self.encode_image_bs64(image_url) + if not image_data: + logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") + return None + return { + "type": "image_url", + "image_url": {"url": image_data}, + } + # 构建内容块列表 content_blocks = [] @@ -648,28 +666,21 @@ async def assemble_context( # 2. 额外的内容块(系统提醒、指令等) if extra_user_content_parts: for part in extra_user_content_parts: - content_blocks.append(part.model_dump()) + if isinstance(part, TextPart): + content_blocks.append({"type": "text", "text": part.text}) + elif isinstance(part, ImageURLPart): + image_part = await resolve_image_part(part.image_url.url) + if image_part: + content_blocks.append(image_part) + else: + raise ValueError(f"不支持的额外内容块类型: {type(part)}") # 3. 图片内容 if image_urls: for image_url in image_urls: - if image_url.startswith("http"): - image_path = await download_image_by_url(image_url) - image_data = await self.encode_image_bs64(image_path) - elif image_url.startswith("file:///"): - image_path = image_url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - else: - image_data = await self.encode_image_bs64(image_url) - if not image_data: - logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") - continue - content_blocks.append( - { - "type": "image_url", - "image_url": {"url": image_data}, - }, - ) + image_part = await resolve_image_part(image_url) + if image_part: + content_blocks.append(image_part) # 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容 if (