From 90c24685fca1afbd24623998bd545340f8491e2e Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Thu, 28 Aug 2025 00:44:53 -0400 Subject: [PATCH 1/4] add hint labeling ui support in loop.py --- src/agentlab/experiments/loop.py | 99 ++++++++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 10 deletions(-) diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py index de4b976a..be34b815 100644 --- a/src/agentlab/experiments/loop.py +++ b/src/agentlab/experiments/loop.py @@ -1,3 +1,4 @@ +import base64 import gzip import importlib.metadata import json @@ -13,12 +14,15 @@ from collections import defaultdict from dataclasses import asdict, dataclass, field, is_dataclass from datetime import datetime +from io import BytesIO from pathlib import Path from typing import Optional import gymnasium as gym import numpy as np +import PIL.Image from browsergym.core.chat import Chat +from browsergym.core.hint_labeling import HintLabeling, HintLabelingInputs from browsergym.experiments.agent import Agent from browsergym.experiments.utils import count_tokens from dataclasses_json import DataClassJsonMixin @@ -404,7 +408,7 @@ def _make_dir(self, exp_root): def run(self): """Run the experiment and save the results""" # start writing logs to run logfile - self._set_logger() + # self._set_logger() # log python environment info save_package_versions(Path(self.exp_dir)) @@ -443,15 +447,18 @@ def run(self): # will end the episode after saving the step info. step_info.truncated = True - step_info.save_step_info( - self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som - ) - logger.debug("Step info saved.") + # step_info.save_step_info( + # self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som + # ) + # logger.debug("Step info saved.") if hasattr(env.unwrapped, "chat") and isinstance(env.unwrapped.chat, Chat): _send_chat_info(env.unwrapped.chat, action, step_info.agent_info) logger.debug("Chat info sent.") + if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling): + _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info) + if action is None: logger.debug("Agent returned None action. Ending episode.") break @@ -481,10 +488,11 @@ def run(self): finally: try: - if step_info is not None: - step_info.save_step_info( - self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som - ) + pass + # if step_info is not None: + # step_info.save_step_info( + # self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som + # ) except Exception as e: logger.error(f"Error while saving step info in the finally block: {e}") try: @@ -508,7 +516,8 @@ def run(self): except Exception as e: logger.exception(f"Error while closing the environment: {e}") try: - self._unset_logger() # stop writing logs to run logfile + # self._unset_logger() # stop writing logs to run logfile + pass except Exception as e: logger.exception(f"Error while unsetting the logger: {e}") @@ -942,6 +951,76 @@ def _send_chat_info(chat: Chat, action: str, agent_info: dict): logger.info(msg) chat.add_message(role="info", msg=msg) +def _convert_np_array_to_base64(np_array: np.ndarray): + im = PIL.Image.fromarray(np_array) + buffered = BytesIO() + im.save(buffered, format="PNG") + img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8") + return img_b64 + +def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo): + """Update the hint labeling with the action and agent info.""" + context = HintLabelingInputs( + goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated? + error_feedback=step_info.obs.get("last_action_error", ""), + screenshot = _convert_np_array_to_base64(step_info.obs["screenshot"]), + axtree = step_info.obs["axtree_txt"], + history = [], # TODO: add history + hint = "", + suggestions = [ + { + "id": "1", + "action": action, + "think": step_info.agent_info.think, + }, + { + "id": "2", + "action": "test", + "think": "test", + } + ] + ) + while True: + # update hint labeling ui context + logger.info("Updating Hint Labeling UI context...") + hint_labeling.update_context(context) + + # wait for hint labeling response + logger.info("Waiting for Hint Labeling UI response...") + response = hint_labeling.wait_for_response() + + # if payload is for reprompt, we ask for 5 suggestions and we combine everything + if response["type"] == "reprompt": + # reprompt model 5 times + hint = response["payload"]["hint"] + agent.flags.extra_instructions = hint + suggestions = [] + for i in tqdm(range(5)): + # TODO: make this more optimal + action = step_info.from_action(agent) + think = step_info.agent_info.think + suggestions.append({"id": str(i+1), "action": action, "think": think}) + + # update context + context = HintLabelingInputs( + goal="blablabli", + error_feedback=context.error_feedback, + screenshot = context.screenshot, + axtree = context.axtree, + history = context.history, + hint = hint, + suggestions = suggestions + ) + continue + + # otherwise, if payload is for action, we return the updated action and save the hint + elif response["type"] == "step": + step_info.agent_info.think = response["payload"]["think"] + action = response["payload"]["action"] + return action + else: + raise ValueError(f"Unknown response type: {response['type']}") + def _flatten_dict(d, parent_key="", sep="."): """Recursively flatten a nested dictionary.""" From f3027441187f8c377c8f45680e9883a89c6a86e9 Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Thu, 28 Aug 2025 09:15:43 -0400 Subject: [PATCH 2/4] update loop --- src/agentlab/experiments/loop.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py index be34b815..835365bb 100644 --- a/src/agentlab/experiments/loop.py +++ b/src/agentlab/experiments/loop.py @@ -457,7 +457,7 @@ def run(self): logger.debug("Chat info sent.") if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling): - _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info) + action = _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info) if action is None: logger.debug("Agent returned None action. Ending episode.") @@ -972,11 +972,6 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent "id": "1", "action": action, "think": step_info.agent_info.think, - }, - { - "id": "2", - "action": "test", - "think": "test", } ] ) @@ -994,16 +989,19 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent # reprompt model 5 times hint = response["payload"]["hint"] agent.flags.extra_instructions = hint + seen_actions = set() suggestions = [] for i in tqdm(range(5)): # TODO: make this more optimal action = step_info.from_action(agent) think = step_info.agent_info.think - suggestions.append({"id": str(i+1), "action": action, "think": think}) - + if action not in seen_actions: + seen_actions.add(action) + suggestions.append({"id": str(len(seen_actions)), "action": action, "think": think}) + # update context context = HintLabelingInputs( - goal="blablabli", + goal=context.goal, error_feedback=context.error_feedback, screenshot = context.screenshot, axtree = context.axtree, From daee88ebc1778009e8c6bd3c927a95aea6798252 Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Thu, 28 Aug 2025 12:11:05 -0400 Subject: [PATCH 3/4] cosmetic changes --- src/agentlab/experiments/loop.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py index 835365bb..aa0b7087 100644 --- a/src/agentlab/experiments/loop.py +++ b/src/agentlab/experiments/loop.py @@ -53,6 +53,8 @@ class EnvArgs(DataClassJsonMixin): storage_state: Optional[str | Path | dict] = None task_kwargs: Optional[dict] = None # use default value from BrowserGym pre_observation_delay: float = None # seconds, wait for JS events to be fired + use_chat_ui: bool = False + use_hint_labeling_ui: bool = False def make_env( self, action_mapping, exp_dir, exp_task_kwargs: dict = {}, use_raw_page_output=True @@ -100,6 +102,8 @@ def make_env( wait_for_user_message=self.wait_for_user_message, action_mapping=action_mapping, # action mapping is provided by the agent use_raw_page_output=use_raw_page_output, + use_chat_ui=self.use_chat_ui, + use_hint_labeling_ui=self.use_hint_labeling_ui, **extra_kwargs, ) @@ -408,7 +412,7 @@ def _make_dir(self, exp_root): def run(self): """Run the experiment and save the results""" # start writing logs to run logfile - # self._set_logger() + self._set_logger() # log python environment info save_package_versions(Path(self.exp_dir)) @@ -991,7 +995,7 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent agent.flags.extra_instructions = hint seen_actions = set() suggestions = [] - for i in tqdm(range(5)): + for _ in range(5): # TODO: make this more optimal action = step_info.from_action(agent) think = step_info.agent_info.think From 9b238bceedd678ed17578d3fac8cf1c6a8bf6f32 Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Thu, 28 Aug 2025 12:12:27 -0400 Subject: [PATCH 4/4] formatting --- src/agentlab/experiments/loop.py | 44 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py index aa0b7087..299c3f83 100644 --- a/src/agentlab/experiments/loop.py +++ b/src/agentlab/experiments/loop.py @@ -460,8 +460,12 @@ def run(self): _send_chat_info(env.unwrapped.chat, action, step_info.agent_info) logger.debug("Chat info sent.") - if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling): - action = _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info) + if hasattr(env.unwrapped, "hint_labeling") and isinstance( + env.unwrapped.hint_labeling, HintLabeling + ): + action = _update_hint_labeling( + env.unwrapped.hint_labeling, action, agent, step_info + ) if action is None: logger.debug("Agent returned None action. Ending episode.") @@ -955,6 +959,7 @@ def _send_chat_info(chat: Chat, action: str, agent_info: dict): logger.info(msg) chat.add_message(role="info", msg=msg) + def _convert_np_array_to_base64(np_array: np.ndarray): im = PIL.Image.fromarray(np_array) buffered = BytesIO() @@ -962,22 +967,25 @@ def _convert_np_array_to_base64(np_array: np.ndarray): img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8") return img_b64 -def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo): + +def _update_hint_labeling( + hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo +): """Update the hint labeling with the action and agent info.""" context = HintLabelingInputs( - goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated? + goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated? error_feedback=step_info.obs.get("last_action_error", ""), - screenshot = _convert_np_array_to_base64(step_info.obs["screenshot"]), - axtree = step_info.obs["axtree_txt"], - history = [], # TODO: add history - hint = "", - suggestions = [ + screenshot=_convert_np_array_to_base64(step_info.obs["screenshot"]), + axtree=step_info.obs["axtree_txt"], + history=[], # TODO: add history + hint="", + suggestions=[ { "id": "1", "action": action, "think": step_info.agent_info.think, } - ] + ], ) while True: # update hint labeling ui context @@ -1001,17 +1009,19 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent think = step_info.agent_info.think if action not in seen_actions: seen_actions.add(action) - suggestions.append({"id": str(len(seen_actions)), "action": action, "think": think}) + suggestions.append( + {"id": str(len(seen_actions)), "action": action, "think": think} + ) # update context context = HintLabelingInputs( goal=context.goal, error_feedback=context.error_feedback, - screenshot = context.screenshot, - axtree = context.axtree, - history = context.history, - hint = hint, - suggestions = suggestions + screenshot=context.screenshot, + axtree=context.axtree, + history=context.history, + hint=hint, + suggestions=suggestions, ) continue @@ -1022,7 +1032,7 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent return action else: raise ValueError(f"Unknown response type: {response['type']}") - + def _flatten_dict(d, parent_key="", sep="."): """Recursively flatten a nested dictionary."""