Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c9852ec
rename to trace-recorder to hilt_agent
amanjaiswal73892 Aug 29, 2025
ed0f1bd
add timeout error for hilt agent.
amanjaiswal73892 Sep 2, 2025
b2c1ac8
darglint and black
amanjaiswal73892 Sep 2, 2025
3b07fe9
correct spelling hilt -> hitl
amanjaiswal73892 Sep 2, 2025
9633275
Move the overlay_action to utils
amanjaiswal73892 Sep 2, 2025
51cacdb
Increase timeout
amanjaiswal73892 Sep 2, 2025
958430c
add docstring for functions and black
amanjaiswal73892 Sep 2, 2025
4453a00
Improve UI and step hint handling for multiple hints
amanjaiswal73892 Sep 2, 2025
97f3904
add snapshots navigation to see history of interactions.
amanjaiswal73892 Sep 2, 2025
88d1d8d
View human added hints in xray agent_info.
amanjaiswal73892 Sep 2, 2025
6b78e8e
revert change to ipynb
amanjaiswal73892 Sep 2, 2025
79cde90
add agent-mentor laucher
amanjaiswal73892 Sep 2, 2025
517aaf5
addling headless as args in agentlab-mentor
amanjaiswal73892 Sep 2, 2025
9ed3376
improve entry point args for agentlab-mentor to allow multiple seeds
amanjaiswal73892 Sep 3, 2025
4f50293
update error-handling for agentlab-mentor
amanjaiswal73892 Sep 3, 2025
dbc332f
update default window size (revert to playwright default)
patricebechard Sep 3, 2025
7d988a8
hack to fix bbox issue
patricebechard Sep 3, 2025
8addffb
simplify CLI args and add ability to download hints using CLI.
amanjaiswal73892 Sep 3, 2025
5921777
black
amanjaiswal73892 Sep 3, 2025
92f9a74
formatting
amanjaiswal73892 Sep 3, 2025
02347f8
add flag to select llm config
patricebechard Sep 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,4 @@ hint = [
[project.scripts]
agentlab-assistant = "agentlab.ui_assistant:main"
agentlab-xray = "agentlab.analyze.agent_xray:main"
agentlab-mentor = "agentlab.agents.hitl_agent.launch_hint_ui:main"
26 changes: 26 additions & 0 deletions src/agentlab/agents/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import copy

from PIL import Image, ImageDraw
from playwright.sync_api import Page

from agentlab.analyze import overlay_utils
from agentlab.llm.llm_utils import img_to_base_64


def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
"""
Expand Down Expand Up @@ -128,3 +133,24 @@ def zoom_webpage(page: Page, zoom_factor: float = 1.5):

page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
return page


def overlay_action(obs, action):
"""Overlays actions on screenshot in-place"""
act_img = copy.deepcopy(obs["screenshot"])
act_img = Image.fromarray(act_img)

new_obs_properties = copy.deepcopy(obs["extra_element_properties"])
import os

if os.getenv("AGENTLAB_USE_RETINA"):
# HACK: divide everything by 2 in the obs
# TODO: make this more robust by changing login in annotate_action directly (or maybe in the obs section?)
for key, value in new_obs_properties.items():
try:
new_obs_properties[key]["bbox"] = [elem / 2 for elem in value["bbox"]]
except:
pass

overlay_utils.annotate_action(act_img, action, properties=new_obs_properties)
return img_to_base_64(act_img)
50 changes: 50 additions & 0 deletions src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing_extensions import Protocol

from agentlab.agents.agent_args import AgentArgs


class MultiCandidateAgent(Protocol):
"""
Protocol for agents that generate multiple candidates for get_action.

This protocol defines the contract for agents that can generate
multiple candidate actions and allow selection of one of them for execution.
"""

def get_candidate_generations(
self, obs: dict, hint: list[str] | None = None, n_candidates: int = 3
) -> "list[dict]":
"""
Generate multiple candidate actions for the given observation.

You can pass extra info in agent_info to update internal state of the
agent based on the selected candidate. Your internal state management
should be robust to multiple calls to the get_candidate_generations method
in a single step.

Args:
obs: The current observation dictionary containing environment state
hint: Optional list of hint strings to guide candidate generation
n_candidates: Number of candidate actions to generate
"""
...

def update_agent_state_from_selected_candidate(self, output: dict):
"""
Update the agent's internal state based on the selected candidate.
This can include any memory or planning updates.

Args:
output: The selected candidate action dictionary
"""
pass


class MultiCandidateAgentArgs(AgentArgs):
def make_agent(self) -> MultiCandidateAgent: ...

def __post_init__(self):
"""Prefix subagent name with 'MC-'."""
super().__post_init__()
if hasattr(self, "agent_name") and self.agent_name:
self.agent_name = "MC-" + self.agent_name
Loading
Loading