Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion astrbot/core/agent/runners/tool_loop_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ async def reset(
async def _iter_llm_responses(self) -> T.AsyncGenerator[LLMResponse, None]:
"""Yields chunks *and* a final LLMResponse."""
payload = {
"contexts": self.run_context.messages,
"contexts": self.run_context.messages, # list[Message]
"func_tool": self.req.func_tool,
"model": self.req.model, # NOTE: in fact, this arg is None in most cases
"session_id": self.req.session_id,
"extra_user_content_parts": self.req.extra_user_content_parts, # list[ContentPart]
}

if self.streaming:
Expand Down
41 changes: 32 additions & 9 deletions astrbot/core/provider/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from astrbot import logger
from astrbot.core.agent.message import (
AssistantMessageSegment,
ContentPart,
ToolCall,
ToolCallMessageSegment,
)
Expand Down Expand Up @@ -92,6 +93,8 @@ class ProviderRequest:
"""会话 ID"""
image_urls: list[str] = field(default_factory=list)
"""图片 URL 列表"""
extra_user_content_parts: list[ContentPart] = field(default_factory=list)
"""额外的用户消息内容部分列表,用于在用户消息后添加额外的内容块(如系统提醒、指令等)。支持 dict 或 ContentPart 对象"""
func_tool: ToolSet | None = None
"""可用的函数工具"""
contexts: list[dict] = field(default_factory=list)
Expand Down Expand Up @@ -166,13 +169,23 @@ def _print_friendly_context(self):

async def assemble_context(self) -> dict:
"""将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。"""
# 构建内容块列表
content_blocks = []

# 1. 用户原始发言(OpenAI 建议:用户发言在前)
if self.prompt and self.prompt.strip():
content_blocks.append({"type": "text", "text": self.prompt})
elif self.image_urls:
# 如果没有文本但有图片,添加占位文本
content_blocks.append({"type": "text", "text": "[图片]"})

# 2. 额外的内容块(系统提醒、指令等)
if self.extra_user_content_parts:
for part in self.extra_user_content_parts:
content_blocks.append(part.model_dump())

# 3. 图片内容
if self.image_urls:
user_content = {
"role": "user",
"content": [
{"type": "text", "text": self.prompt if self.prompt else "[图片]"},
],
}
for image_url in self.image_urls:
if image_url.startswith("http"):
image_path = await download_image_by_url(image_url)
Expand All @@ -185,11 +198,21 @@ async def assemble_context(self) -> dict:
if not image_data:
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
continue
user_content["content"].append(
content_blocks.append(
{"type": "image_url", "image_url": {"url": image_data}},
)
return user_content
return {"role": "user", "content": self.prompt}

# 只有当只有一个来自 prompt 的文本块且没有额外内容块时,才降级为简单格式以保持向后兼容
if (
len(content_blocks) == 1
and content_blocks[0]["type"] == "text"
and not self.extra_user_content_parts
and not self.image_urls
):
return {"role": "user", "content": content_blocks[0]["text"]}

# 否则返回多模态格式
return {"role": "user", "content": content_blocks}

async def _encode_image_bs64(self, image_url: str) -> str:
"""将图片转换为 base64"""
Expand Down
4 changes: 3 additions & 1 deletion astrbot/core/provider/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections.abc import AsyncGenerator
from typing import TypeAlias, Union

from astrbot.core.agent.message import Message
from astrbot.core.agent.message import ContentPart, Message
from astrbot.core.agent.tool import ToolSet
from astrbot.core.provider.entities import (
LLMResponse,
Expand Down Expand Up @@ -103,6 +103,7 @@ async def text_chat(
system_prompt: str | None = None,
tool_calls_result: ToolCallsResult | list[ToolCallsResult] | None = None,
model: str | None = None,
extra_user_content_parts: list[ContentPart] | None = None,
**kwargs,
) -> LLMResponse:
"""获得 LLM 的文本对话结果。会使用当前的模型进行对话。
Expand All @@ -114,6 +115,7 @@ async def text_chat(
tools: tool set
contexts: 上下文,和 prompt 二选一使用
tool_calls_result: 回传给 LLM 的工具调用结果。参考: https://platform.openai.com/docs/guides/function-calling
extra_user_content_parts: 额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等)
kwargs: 其他参数

Notes:
Expand Down
93 changes: 70 additions & 23 deletions astrbot/core/provider/sources/anthropic_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from astrbot import logger
from astrbot.api.provider import Provider
from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
from astrbot.core.provider.entities import LLMResponse, TokenUsage
from astrbot.core.provider.func_tool_manager import ToolSet
from astrbot.core.utils.io import download_image_by_url
Expand Down Expand Up @@ -296,13 +297,16 @@ async def text_chat(
system_prompt=None,
tool_calls_result=None,
model=None,
extra_user_content_parts=None,
**kwargs,
) -> LLMResponse:
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
new_record = await self.assemble_context(prompt, image_urls)
new_record = await self.assemble_context(
prompt, image_urls, extra_user_content_parts
)
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
Expand Down Expand Up @@ -350,13 +354,16 @@ async def text_chat_stream(
system_prompt=None,
tool_calls_result=None,
model=None,
extra_user_content_parts=None,
**kwargs,
):
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
new_record = await self.assemble_context(prompt, image_urls)
new_record = await self.assemble_context(
prompt, image_urls, extra_user_content_parts
)
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
Expand Down Expand Up @@ -388,15 +395,15 @@ async def text_chat_stream(
async for llm_response in self._query_stream(payloads, func_tool):
yield llm_response

async def assemble_context(self, text: str, image_urls: list[str] | None = None):
async def assemble_context(
self,
text: str,
image_urls: list[str] | None = None,
extra_user_content_parts: list[ContentPart] | None = None,
):
"""组装上下文,支持文本和图片"""
if not image_urls:
return {"role": "user", "content": text}

content = []
content.append({"type": "text", "text": text})

for image_url in image_urls:
async def resolve_image_url(image_url: str) -> dict | None:
if image_url.startswith("http"):
image_path = await download_image_by_url(image_url)
image_data = await self.encode_image_bs64(image_path)
Expand All @@ -408,28 +415,68 @@ async def assemble_context(self, text: str, image_urls: list[str] | None = None)

if not image_data:
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
continue
return None

# Get mime type for the image
mime_type, _ = guess_type(image_url)
if not mime_type:
mime_type = "image/jpeg" # Default to JPEG if can't determine

content.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": mime_type,
"data": (
image_data.split("base64,")[1]
if "base64," in image_data
else image_data
),
},
return {
"type": "image",
"source": {
"type": "base64",
"media_type": mime_type,
"data": (
image_data.split("base64,")[1]
if "base64," in image_data
else image_data
),
},
)
}

content = []

# 1. 用户原始发言(OpenAI 建议:用户发言在前)
if text:
content.append({"type": "text", "text": text})
elif image_urls:
# 如果没有文本但有图片,添加占位文本
content.append({"type": "text", "text": "[图片]"})
elif extra_user_content_parts:
# 如果只有额外内容块,也需要添加占位文本
content.append({"type": "text", "text": " "})

# 2. 额外的内容块(系统提醒、指令等)
if extra_user_content_parts:
for block in extra_user_content_parts:
if isinstance(block, TextPart):
content.append({"type": "text", "text": block.text})
elif isinstance(block, ImageURLPart):
image_dict = await resolve_image_url(block.image_url.url)
if image_dict:
content.append(image_dict)
else:
raise ValueError(f"不支持的额外内容块类型: {type(block)}")

# 3. 图片内容
if image_urls:
for image_url in image_urls:
image_dict = await resolve_image_url(image_url)
if image_dict:
content.append(image_dict)

# 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容
if (
text
and not extra_user_content_parts
and not image_urls
and len(content) == 1
and content[0]["type"] == "text"
):
return {"role": "user", "content": content[0]["text"]}

# 否则返回多模态格式
return {"role": "user", "content": content}

async def encode_image_bs64(self, image_url: str) -> str:
Expand Down
Loading