From 90c24685fca1afbd24623998bd545340f8491e2e Mon Sep 17 00:00:00 2001
From: Patrice Bechard <patrice.bechard@servicenow.com>
Date: Thu, 28 Aug 2025 00:44:53 -0400
Subject: [PATCH 1/4] add hint labeling ui support in loop.py

---
 src/agentlab/experiments/loop.py | 99 ++++++++++++++++++++++++++++----
 1 file changed, 89 insertions(+), 10 deletions(-)

diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index de4b976a..be34b815 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -1,3 +1,4 @@
+import base64
 import gzip
 import importlib.metadata
 import json
@@ -13,12 +14,15 @@
 from collections import defaultdict
 from dataclasses import asdict, dataclass, field, is_dataclass
 from datetime import datetime
+from io import BytesIO
 from pathlib import Path
 from typing import Optional
 
 import gymnasium as gym
 import numpy as np
+import PIL.Image
 from browsergym.core.chat import Chat
+from browsergym.core.hint_labeling import HintLabeling, HintLabelingInputs
 from browsergym.experiments.agent import Agent
 from browsergym.experiments.utils import count_tokens
 from dataclasses_json import DataClassJsonMixin
@@ -404,7 +408,7 @@ def _make_dir(self, exp_root):
     def run(self):
         """Run the experiment and save the results"""
         # start writing logs to run logfile
-        self._set_logger()
+        # self._set_logger()
 
         # log python environment info
         save_package_versions(Path(self.exp_dir))
@@ -443,15 +447,18 @@ def run(self):
                     # will end the episode after saving the step info.
                     step_info.truncated = True
 
-                step_info.save_step_info(
-                    self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
-                )
-                logger.debug("Step info saved.")
+                # step_info.save_step_info(
+                #     self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
+                # )
+                # logger.debug("Step info saved.")
 
                 if hasattr(env.unwrapped, "chat") and isinstance(env.unwrapped.chat, Chat):
                     _send_chat_info(env.unwrapped.chat, action, step_info.agent_info)
                     logger.debug("Chat info sent.")
 
+                if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling):
+                    _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info)
+
                 if action is None:
                     logger.debug("Agent returned None action. Ending episode.")
                     break
@@ -481,10 +488,11 @@ def run(self):
 
         finally:
             try:
-                if step_info is not None:
-                    step_info.save_step_info(
-                        self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
-                    )
+                pass
+                # if step_info is not None:
+                #     step_info.save_step_info(
+                #         self.exp_dir, save_screenshot=self.save_screenshot, save_som=self.save_som
+                #     )
             except Exception as e:
                 logger.error(f"Error while saving step info in the finally block: {e}")
             try:
@@ -508,7 +516,8 @@ def run(self):
             except Exception as e:
                 logger.exception(f"Error while closing the environment: {e}")
             try:
-                self._unset_logger()  # stop writing logs to run logfile
+                # self._unset_logger()  # stop writing logs to run logfile
+                pass
             except Exception as e:
                 logger.exception(f"Error while unsetting the logger: {e}")
 
@@ -942,6 +951,76 @@ def _send_chat_info(chat: Chat, action: str, agent_info: dict):
     logger.info(msg)
     chat.add_message(role="info", msg=msg)
 
+def _convert_np_array_to_base64(np_array: np.ndarray):
+    im = PIL.Image.fromarray(np_array)
+    buffered = BytesIO()
+    im.save(buffered, format="PNG")
+    img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_b64
+
+def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo):
+    """Update the hint labeling with the action and agent info."""
+    context = HintLabelingInputs(
+        goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated?
+        error_feedback=step_info.obs.get("last_action_error", ""),
+        screenshot = _convert_np_array_to_base64(step_info.obs["screenshot"]),
+        axtree = step_info.obs["axtree_txt"],
+        history = [], # TODO: add history
+        hint = "",
+        suggestions = [
+            {
+                "id": "1",
+                "action": action,
+                "think": step_info.agent_info.think,
+            },
+            {
+                "id": "2",
+                "action": "test",
+                "think": "test",
+            }
+        ]
+    )
+    while True:
+        # update hint labeling ui context
+        logger.info("Updating Hint Labeling UI context...")
+        hint_labeling.update_context(context)
+
+        # wait for hint labeling response
+        logger.info("Waiting for Hint Labeling UI response...")
+        response = hint_labeling.wait_for_response()
+
+        # if payload is for reprompt, we ask for 5 suggestions and we combine everything
+        if response["type"] == "reprompt":
+            # reprompt model 5 times
+            hint = response["payload"]["hint"]
+            agent.flags.extra_instructions = hint
+            suggestions = []
+            for i in tqdm(range(5)):
+                # TODO: make this more optimal
+                action = step_info.from_action(agent)
+                think = step_info.agent_info.think
+                suggestions.append({"id": str(i+1), "action": action, "think": think})
+            
+            # update context
+            context = HintLabelingInputs(
+                goal="blablabli",
+                error_feedback=context.error_feedback,
+                screenshot = context.screenshot,
+                axtree = context.axtree,
+                history = context.history,
+                hint = hint,
+                suggestions = suggestions
+            )
+            continue
+
+        # otherwise, if payload is for action, we return the updated action and save the hint
+        elif response["type"] == "step":
+            step_info.agent_info.think = response["payload"]["think"]
+            action = response["payload"]["action"]
+            return action
+        else:
+            raise ValueError(f"Unknown response type: {response['type']}")
+            
 
 def _flatten_dict(d, parent_key="", sep="."):
     """Recursively flatten a nested dictionary."""

From f3027441187f8c377c8f45680e9883a89c6a86e9 Mon Sep 17 00:00:00 2001
From: Patrice Bechard <patrice.bechard@servicenow.com>
Date: Thu, 28 Aug 2025 09:15:43 -0400
Subject: [PATCH 2/4] update loop

---
 src/agentlab/experiments/loop.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index be34b815..835365bb 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -457,7 +457,7 @@ def run(self):
                     logger.debug("Chat info sent.")
 
                 if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling):
-                    _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info)
+                    action = _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info)
 
                 if action is None:
                     logger.debug("Agent returned None action. Ending episode.")
@@ -972,11 +972,6 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
                 "id": "1",
                 "action": action,
                 "think": step_info.agent_info.think,
-            },
-            {
-                "id": "2",
-                "action": "test",
-                "think": "test",
             }
         ]
     )
@@ -994,16 +989,19 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
             # reprompt model 5 times
             hint = response["payload"]["hint"]
             agent.flags.extra_instructions = hint
+            seen_actions = set()
             suggestions = []
             for i in tqdm(range(5)):
                 # TODO: make this more optimal
                 action = step_info.from_action(agent)
                 think = step_info.agent_info.think
-                suggestions.append({"id": str(i+1), "action": action, "think": think})
-            
+                if action not in seen_actions:
+                    seen_actions.add(action)
+                    suggestions.append({"id": str(len(seen_actions)), "action": action, "think": think})
+
             # update context
             context = HintLabelingInputs(
-                goal="blablabli",
+                goal=context.goal,
                 error_feedback=context.error_feedback,
                 screenshot = context.screenshot,
                 axtree = context.axtree,

From daee88ebc1778009e8c6bd3c927a95aea6798252 Mon Sep 17 00:00:00 2001
From: Patrice Bechard <patrice.bechard@servicenow.com>
Date: Thu, 28 Aug 2025 12:11:05 -0400
Subject: [PATCH 3/4] cosmetic changes

---
 src/agentlab/experiments/loop.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index 835365bb..aa0b7087 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -53,6 +53,8 @@ class EnvArgs(DataClassJsonMixin):
     storage_state: Optional[str | Path | dict] = None
     task_kwargs: Optional[dict] = None  # use default value from BrowserGym
     pre_observation_delay: float = None  # seconds, wait for JS events to be fired
+    use_chat_ui: bool = False
+    use_hint_labeling_ui: bool = False
 
     def make_env(
         self, action_mapping, exp_dir, exp_task_kwargs: dict = {}, use_raw_page_output=True
@@ -100,6 +102,8 @@ def make_env(
             wait_for_user_message=self.wait_for_user_message,
             action_mapping=action_mapping,  # action mapping is provided by the agent
             use_raw_page_output=use_raw_page_output,
+            use_chat_ui=self.use_chat_ui,
+            use_hint_labeling_ui=self.use_hint_labeling_ui,
             **extra_kwargs,
         )
 
@@ -408,7 +412,7 @@ def _make_dir(self, exp_root):
     def run(self):
         """Run the experiment and save the results"""
         # start writing logs to run logfile
-        # self._set_logger()
+        self._set_logger()
 
         # log python environment info
         save_package_versions(Path(self.exp_dir))
@@ -991,7 +995,7 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
             agent.flags.extra_instructions = hint
             seen_actions = set()
             suggestions = []
-            for i in tqdm(range(5)):
+            for _ in range(5):
                 # TODO: make this more optimal
                 action = step_info.from_action(agent)
                 think = step_info.agent_info.think

From 9b238bceedd678ed17578d3fac8cf1c6a8bf6f32 Mon Sep 17 00:00:00 2001
From: Patrice Bechard <patrice.bechard@servicenow.com>
Date: Thu, 28 Aug 2025 12:12:27 -0400
Subject: [PATCH 4/4] formatting

---
 src/agentlab/experiments/loop.py | 44 ++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index aa0b7087..299c3f83 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -460,8 +460,12 @@ def run(self):
                     _send_chat_info(env.unwrapped.chat, action, step_info.agent_info)
                     logger.debug("Chat info sent.")
 
-                if hasattr(env.unwrapped, "hint_labeling") and isinstance(env.unwrapped.hint_labeling, HintLabeling):
-                    action = _update_hint_labeling(env.unwrapped.hint_labeling, action, agent, step_info)
+                if hasattr(env.unwrapped, "hint_labeling") and isinstance(
+                    env.unwrapped.hint_labeling, HintLabeling
+                ):
+                    action = _update_hint_labeling(
+                        env.unwrapped.hint_labeling, action, agent, step_info
+                    )
 
                 if action is None:
                     logger.debug("Agent returned None action. Ending episode.")
@@ -955,6 +959,7 @@ def _send_chat_info(chat: Chat, action: str, agent_info: dict):
     logger.info(msg)
     chat.add_message(role="info", msg=msg)
 
+
 def _convert_np_array_to_base64(np_array: np.ndarray):
     im = PIL.Image.fromarray(np_array)
     buffered = BytesIO()
@@ -962,22 +967,25 @@ def _convert_np_array_to_base64(np_array: np.ndarray):
     img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
     return img_b64
 
-def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo):
+
+def _update_hint_labeling(
+    hint_labeling: HintLabeling, action: str, agent: Agent, step_info: StepInfo
+):
     """Update the hint labeling with the action and agent info."""
     context = HintLabelingInputs(
-        goal=step_info.obs.get("goal", ""), # TODO: is this goal deprecated?
+        goal=step_info.obs.get("goal", ""),  # TODO: is this goal deprecated?
         error_feedback=step_info.obs.get("last_action_error", ""),
-        screenshot = _convert_np_array_to_base64(step_info.obs["screenshot"]),
-        axtree = step_info.obs["axtree_txt"],
-        history = [], # TODO: add history
-        hint = "",
-        suggestions = [
+        screenshot=_convert_np_array_to_base64(step_info.obs["screenshot"]),
+        axtree=step_info.obs["axtree_txt"],
+        history=[],  # TODO: add history
+        hint="",
+        suggestions=[
             {
                 "id": "1",
                 "action": action,
                 "think": step_info.agent_info.think,
             }
-        ]
+        ],
     )
     while True:
         # update hint labeling ui context
@@ -1001,17 +1009,19 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
                 think = step_info.agent_info.think
                 if action not in seen_actions:
                     seen_actions.add(action)
-                    suggestions.append({"id": str(len(seen_actions)), "action": action, "think": think})
+                    suggestions.append(
+                        {"id": str(len(seen_actions)), "action": action, "think": think}
+                    )
 
             # update context
             context = HintLabelingInputs(
                 goal=context.goal,
                 error_feedback=context.error_feedback,
-                screenshot = context.screenshot,
-                axtree = context.axtree,
-                history = context.history,
-                hint = hint,
-                suggestions = suggestions
+                screenshot=context.screenshot,
+                axtree=context.axtree,
+                history=context.history,
+                hint=hint,
+                suggestions=suggestions,
             )
             continue
 
@@ -1022,7 +1032,7 @@ def _update_hint_labeling(hint_labeling: HintLabeling, action: str, agent: Agent
             return action
         else:
             raise ValueError(f"Unknown response type: {response['type']}")
-            
+
 
 def _flatten_dict(d, parent_key="", sep="."):
     """Recursively flatten a nested dictionary."""