Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
f57a919
Enhance goal application to support dynamic system messages based on …
recursix Jun 20, 2025
3a21dc7
Merge branch 'main' into allac/next-agent
recursix Jun 20, 2025
0b9a985
Modify API for parallel tool calls add parallel tool support for anth…
amanjaiswal73892 Jun 20, 2025
89376ab
enable screenshot tagging in xray for mutli-actions
amanjaiswal73892 Jun 20, 2025
81b1a88
add tool call and tool response markdown in xray
amanjaiswal73892 Jun 20, 2025
c244db3
update tests for parallel tool calls in claude for responses_api.py
amanjaiswal73892 Jun 23, 2025
e7613d6
Add support for openai CUA for tool use agent
amanjaiswal73892 Jun 26, 2025
b4c283f
update openai cache tracking usage to support Chat Completion and Res…
amanjaiswal73892 Jun 26, 2025
c15331a
Merge remote-tracking branch 'origin' into allac/next-agent
amanjaiswal73892 Jun 27, 2025
00da78a
fix: join action_list into a single string with new-lines
amanjaiswal73892 Jun 27, 2025
1d7060c
Refactor OpenAiResponsesModel to isolate env specific functionality a…
amanjaiswal73892 Jun 28, 2025
997cc7b
Change func names to use 'env' instead of 'bgym'
amanjaiswal73892 Jun 30, 2025
b3d409f
Add TODO's and WIP config classes
amanjaiswal73892 Jun 30, 2025
f5443b6
Refactor OpenAICUAModel to streamline action handling and improve cod…
amanjaiswal73892 Jun 30, 2025
ddc8f56
fix typo in tracking
amanjaiswal73892 Jul 12, 2025
a375a35
Refactor TrackAPIPricingMixin to remove init inside mixin class
amanjaiswal73892 Jul 13, 2025
8ce9885
Multiaction support and cleaner API usage with ToolCalls.
amanjaiswal73892 Jul 13, 2025
df3bc6d
Use APIPayload obj to call llms in tool use agent.
amanjaiswal73892 Jul 13, 2025
0ec59f8
remove is_env_action method from ToolCalls.
amanjaiswal73892 Jul 13, 2025
e716b8b
remove stale method and black formatting
amanjaiswal73892 Jul 13, 2025
2fc5b5f
remove OAI cua reference from tool use agent
amanjaiswal73892 Jul 14, 2025
160fb93
remove openai cua from this PR
amanjaiswal73892 Jul 14, 2025
d31f49f
Refactor APIPayload validation
amanjaiswal73892 Jul 14, 2025
d4368fe
Update test for the new API
amanjaiswal73892 Jul 14, 2025
1305892
Add tests to check parallel tool calling ability of APIs and models.
amanjaiswal73892 Jul 14, 2025
8f2ee28
add tool_call_to_python_code in response_api.py
amanjaiswal73892 Jul 14, 2025
dff3e33
fix qoutes in response API test
amanjaiswal73892 Jul 14, 2025
1fb1b0a
Merge with main
amanjaiswal73892 Jul 14, 2025
fe37bb8
Make gaia test conditional on successful import
amanjaiswal73892 Jul 14, 2025
72feab7
add tool calls to message for pretty xray output
amanjaiswal73892 Jul 14, 2025
f8a5a3c
use tool_call_to_python_code in to_markdown for responded tool calls
amanjaiswal73892 Jul 14, 2025
12b6d50
black formatting.
amanjaiswal73892 Jul 14, 2025
42aa680
make darglint compliant
amanjaiswal73892 Jul 14, 2025
ddf672b
black formatting
amanjaiswal73892 Jul 14, 2025
119a43d
Log warning if effective cost is negative
amanjaiswal73892 Jul 15, 2025
8f3105f
update test mock object to incude cache info.
amanjaiswal73892 Jul 15, 2025
16d334c
fix nested mock object in test.
amanjaiswal73892 Jul 15, 2025
f60b314
Fix Reponses API mock object specification in test_responses_api.py
amanjaiswal73892 Jul 15, 2025
22f9385
fix obs indentation and remove unnessesary code.
amanjaiswal73892 Jul 15, 2025
a6f5349
improve type hints
amanjaiswal73892 Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 80 additions & 47 deletions src/agentlab/agents/tool_use_agent/tool_use_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@
from agentlab.agents.agent_args import AgentArgs
from agentlab.llm.llm_utils import image_to_png_base64_url
from agentlab.llm.response_api import (
APIPayload,
ClaudeResponseModelArgs,
LLMOutput,
MessageBuilder,
OpenAIChatModelArgs,
OpenAIResponseModelArgs,
OpenRouterModelArgs,
ToolCalls,
)
from agentlab.llm.tracking import cost_tracker_decorator

Expand Down Expand Up @@ -98,7 +101,8 @@ def flatten(self) -> list[MessageBuilder]:
messages.extend(group.messages)
# Mark all summarized messages for caching
if i == len(self.groups) - keep_last_n_obs:
messages[i].mark_all_previous_msg_for_caching()
if not isinstance(messages[i], ToolCalls):
messages[i].mark_all_previous_msg_for_caching()
return messages

def set_last_summary(self, summary: MessageBuilder):
Expand Down Expand Up @@ -163,18 +167,15 @@ class Obs(Block):
use_dom: bool = False
use_som: bool = False
use_tabs: bool = False
add_mouse_pointer: bool = False
# add_mouse_pointer: bool = False
use_zoomed_webpage: bool = False

def apply(
self, llm, discussion: StructuredDiscussion, obs: dict, last_llm_output: LLMOutput
) -> dict:

if last_llm_output.tool_calls is None:
obs_msg = llm.msg.user() # type: MessageBuilder
else:
obs_msg = llm.msg.tool(last_llm_output.raw_response) # type: MessageBuilder

obs_msg = llm.msg.user()
tool_calls = last_llm_output.tool_calls
if self.use_last_error:
if obs["last_action_error"] != "":
obs_msg.add_text(f"Last action error:\n{obs['last_action_error']}")
Expand All @@ -186,13 +187,12 @@ def apply(
else:
screenshot = obs["screenshot"]

if self.add_mouse_pointer:
# TODO this mouse pointer should be added at the browsergym level
screenshot = np.array(
agent_utils.add_mouse_pointer_from_action(
Image.fromarray(obs["screenshot"]), obs["last_action"]
)
)
# if self.add_mouse_pointer:
# screenshot = np.array(
# agent_utils.add_mouse_pointer_from_action(
# Image.fromarray(obs["screenshot"]), obs["last_action"]
# )
# )

obs_msg.add_image(image_to_png_base64_url(screenshot))
if self.use_axtree:
Expand All @@ -203,6 +203,13 @@ def apply(
obs_msg.add_text(_format_tabs(obs))

discussion.append(obs_msg)

if tool_calls:
for call in tool_calls:
call.response_text("See Observation")
tool_response = llm.msg.add_responded_tool_calls(tool_calls)
discussion.append(tool_response)

return obs_msg


Expand Down Expand Up @@ -254,8 +261,8 @@ def apply(self, llm, discussion: StructuredDiscussion) -> dict:
msg = llm.msg.user().add_text("""Summarize\n""")

discussion.append(msg)
# TODO need to make sure we don't force tool use here
summary_response = llm(messages=discussion.flatten(), tool_choice="none")

summary_response = llm(APIPayload(messages=discussion.flatten()))

summary_msg = llm.msg.assistant().add_text(summary_response.think)
discussion.append(summary_msg)
Expand Down Expand Up @@ -320,25 +327,6 @@ def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict:
discussion.append(msg)


class ToolCall(Block):

def __init__(self, tool_server):
self.tool_server = tool_server

def apply(self, llm, messages: list[MessageBuilder], obs: dict) -> dict:
# build the message by adding components to obs
response: LLMOutput = llm(messages=self.messages)

messages.append(response.assistant_message) # this is tool call

tool_answer = self.tool_server.call_tool(response)
tool_msg = llm.msg.tool() # type: MessageBuilder
tool_msg.add_tool_id(response.last_computer_call_id)
tool_msg.update_last_raw_response(response)
tool_msg.add_text(str(tool_answer))
messages.append(tool_msg)


@dataclass
class PromptConfig:
tag_screenshot: bool = True # Whether to tag the screenshot with the last action.
Expand Down Expand Up @@ -394,7 +382,7 @@ def __init__(

self.call_ids = []

self.llm = model_args.make_model(extra_kwargs={"tools": self.tools})
self.llm = model_args.make_model()
self.msg_builder = model_args.get_message_builder()
self.llm.msg = self.msg_builder

Expand Down Expand Up @@ -462,21 +450,23 @@ def get_action(self, obs: Any) -> float:

messages = self.discussion.flatten()
response: LLMOutput = self.llm(
messages=messages,
tool_choice="any",
cache_tool_definition=True,
cache_complete_prompt=False,
use_cache_breakpoints=True,
APIPayload(
messages=messages,
tools=self.tools, # You can update tools available tools now.
tool_choice="any",
cache_tool_definition=True,
cache_complete_prompt=False,
use_cache_breakpoints=True,
)
)

action = response.action
think = response.think
last_summary = self.discussion.get_last_summary()
if last_summary is not None:
think = last_summary.content[0]["text"] + "\n" + think

self.discussion.new_group()
self.discussion.append(response.tool_calls)
# self.discussion.append(response.tool_calls) # No need to append tool calls anymore.

self.last_response = response
self._responses.append(response) # may be useful for debugging
Expand All @@ -486,8 +476,11 @@ def get_action(self, obs: Any) -> float:
tools_msg = MessageBuilder("tool_description").add_text(tools_str)

# Adding these extra messages to visualize in gradio
messages.insert(0, tools_msg) # insert at the beginning of the messages
messages.append(response.tool_calls)
messages.insert(0, tools_msg) # insert at the beginning of the message
# This avoids the assertion error with self.llm.user().add_responded_tool_calls(tool_calls)
msg = self.llm.msg("tool")
msg.responded_tool_calls = response.tool_calls
messages.append(msg)

agent_info = bgym.AgentInfo(
think=think,
Expand Down Expand Up @@ -533,6 +526,31 @@ def get_action(self, obs: Any) -> float:
vision_support=True,
)

O3_RESPONSE_MODEL = OpenAIResponseModelArgs(
model_name="o3-2025-04-16",
max_total_tokens=200_000,
max_input_tokens=200_000,
max_new_tokens=2_000,
temperature=None, # O3 does not support temperature
vision_support=True,
)
O3_CHATAPI_MODEL = OpenAIChatModelArgs(
model_name="o3-2025-04-16",
max_total_tokens=200_000,
max_input_tokens=200_000,
max_new_tokens=2_000,
temperature=None,
vision_support=True,
)

GPT4_1_OPENROUTER_MODEL = OpenRouterModelArgs(
model_name="openai/gpt-4.1",
max_total_tokens=200_000,
max_input_tokens=200_000,
max_new_tokens=2_000,
temperature=None, # O3 does not support temperature
vision_support=True,
)

DEFAULT_PROMPT_CONFIG = PromptConfig(
tag_screenshot=True,
Expand All @@ -548,8 +566,8 @@ def get_action(self, obs: Any) -> float:
summarizer=Summarizer(do_summary=True),
general_hints=GeneralHints(use_hints=False),
task_hint=TaskHint(use_task_hint=True),
keep_last_n_obs=None, # keep only the last observation in the discussion
multiaction=False, # whether to use multi-action or not
keep_last_n_obs=None,
multiaction=True, # whether to use multi-action or not
# action_subsets=("bid",),
action_subsets=("coord"),
# action_subsets=("coord", "bid"),
Expand All @@ -559,3 +577,18 @@ def get_action(self, obs: Any) -> float:
model_args=CLAUDE_MODEL_CONFIG,
config=DEFAULT_PROMPT_CONFIG,
)

OAI_AGENT = ToolUseAgentArgs(
model_args=GPT_4_1,
config=DEFAULT_PROMPT_CONFIG,
)

OAI_CHATAPI_AGENT = ToolUseAgentArgs(
model_args=O3_CHATAPI_MODEL,
config=DEFAULT_PROMPT_CONFIG,
)

OAI_OPENROUTER_AGENT = ToolUseAgentArgs(
model_args=GPT4_1_OPENROUTER_MODEL,
config=DEFAULT_PROMPT_CONFIG,
)
4 changes: 4 additions & 0 deletions src/agentlab/analyze/agent_xray.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from agentlab.llm.llm_utils import BaseMessage as AgentLabBaseMessage
from agentlab.llm.llm_utils import Discussion
from agentlab.llm.response_api import MessageBuilder
from agentlab.llm.response_api import ToolCalls

select_dir_instructions = "Select Experiment Directory"
AGENT_NAME_KEY = "agent.agent_name"
Expand Down Expand Up @@ -673,6 +674,9 @@ def dict_to_markdown(d: dict):
str: A markdown-formatted string representation of the dictionary.
"""
if not isinstance(d, dict):
if isinstance(d, ToolCalls):
# ToolCalls rendered by to_markdown method.
return ""
warning(f"Expected dict, got {type(d)}")
return repr(d)
if not d:
Expand Down
Loading