diff --git a/pyproject.toml b/pyproject.toml
index ef9a7342..b2d7eacb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,3 +109,4 @@ hint = [
 [project.scripts]
 agentlab-assistant = "agentlab.ui_assistant:main"
 agentlab-xray = "agentlab.analyze.agent_xray:main"
+agentlab-mentor = "agentlab.agents.hitl_agent.launch_hint_ui:main"
diff --git a/src/agentlab/agents/agent_utils.py b/src/agentlab/agents/agent_utils.py
index 29219d2d..179a94d2 100644
--- a/src/agentlab/agents/agent_utils.py
+++ b/src/agentlab/agents/agent_utils.py
@@ -1,6 +1,11 @@
+import copy
+
 from PIL import Image, ImageDraw
 from playwright.sync_api import Page
 
+from agentlab.analyze import overlay_utils
+from agentlab.llm.llm_utils import img_to_base_64
+
 
 def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
     """
@@ -128,3 +133,24 @@ def zoom_webpage(page: Page, zoom_factor: float = 1.5):
 
     page.evaluate(f"document.documentElement.style.zoom='{zoom_factor*100}%'")
     return page
+
+
+def overlay_action(obs, action):
+    """Overlays actions on screenshot in-place"""
+    act_img = copy.deepcopy(obs["screenshot"])
+    act_img = Image.fromarray(act_img)
+
+    new_obs_properties = copy.deepcopy(obs["extra_element_properties"])
+    import os
+
+    if os.getenv("AGENTLAB_USE_RETINA"):
+        # HACK: divide everything by 2 in the obs
+        # TODO: make this more robust by changing login in annotate_action directly (or maybe in the obs section?)
+        for key, value in new_obs_properties.items():
+            try:
+                new_obs_properties[key]["bbox"] = [elem / 2 for elem in value["bbox"]]
+            except:
+                pass
+
+    overlay_utils.annotate_action(act_img, action, properties=new_obs_properties)
+    return img_to_base_64(act_img)
diff --git a/src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py b/src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py
new file mode 100644
index 00000000..81a0db08
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/base_multi_candidate_agent.py
@@ -0,0 +1,50 @@
+from typing_extensions import Protocol
+
+from agentlab.agents.agent_args import AgentArgs
+
+
+class MultiCandidateAgent(Protocol):
+    """
+    Protocol for agents that generate multiple candidates for get_action.
+
+    This protocol defines the contract for agents that can generate
+    multiple candidate actions and allow selection of one of them for execution.
+    """
+
+    def get_candidate_generations(
+        self, obs: dict, hint: list[str] | None = None, n_candidates: int = 3
+    ) -> "list[dict]":
+        """
+        Generate multiple candidate actions for the given observation.
+
+        You can pass extra info in agent_info to update internal state of the
+        agent based on the selected candidate. Your internal state management
+        should be robust to multiple calls to the get_candidate_generations method
+        in a single step.
+
+        Args:
+            obs: The current observation dictionary containing environment state
+            hint: Optional list of hint strings to guide candidate generation
+            n_candidates: Number of candidate actions to generate
+        """
+        ...
+
+    def update_agent_state_from_selected_candidate(self, output: dict):
+        """
+        Update the agent's internal state based on the selected candidate.
+        This can include any memory or planning updates.
+
+        Args:
+            output: The selected candidate action dictionary
+        """
+        pass
+
+
+class MultiCandidateAgentArgs(AgentArgs):
+    def make_agent(self) -> MultiCandidateAgent: ...
+
+    def __post_init__(self):
+        """Prefix subagent name with 'MC-'."""
+        super().__post_init__()
+        if hasattr(self, "agent_name") and self.agent_name:
+            self.agent_name = "MC-" + self.agent_name
diff --git a/src/agentlab/agents/hitl_agent/generic_human_guided_agent.py b/src/agentlab/agents/hitl_agent/generic_human_guided_agent.py
new file mode 100644
index 00000000..e8d31688
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/generic_human_guided_agent.py
@@ -0,0 +1,362 @@
+import base64
+import copy
+import io
+import re
+from dataclasses import Field, asdict, dataclass
+from typing import Dict, List
+
+import bgym
+import numpy as np
+from browsergym.experiments.agent import AgentInfo
+from PIL import Image
+
+from agentlab.agents import dynamic_prompting as dp
+from agentlab.agents.agent_utils import overlay_action
+from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericAgentArgs
+from agentlab.agents.generic_agent.generic_agent_prompt import MainPrompt
+from agentlab.agents.hitl_agent.hint_labelling import (
+    HintLabeling,
+    HintLabelingInputs,
+)
+from agentlab.llm.llm_utils import (
+    Discussion,
+    HumanMessage,
+    SystemMessage,
+    img_to_base_64,
+)
+from agentlab.llm.tracking import cost_tracker_decorator
+
+
+class CandidatesGeneration(dp.PromptElement):
+    # Ask for multiple alternatives; each candidate must contain <think> and <action>.
+    def __init__(self, hint: list[str] | None = None, n_candidates=3) -> None:
+        self.hint = hint
+        self.n_candidates = n_candidates
+        self.hint_prompt = "\n".join(f"{i}. {c}" for i, c in enumerate(hint, 1)) if hint else ""
+        super().__init__(True)
+        self._prompt = [
+            dict(
+                type="text",
+                text=f"""
+    You are a web agent. Propose {self.n_candidates} alternative next steps for the current page.
+    {('Use the Hints:' + self.hint_prompt) if self.hint else ""}\n
+    Return EACH candidate wrapped as numbered tags:
+    <candidate_generation_1>...</candidate_generation_1>
+    <candidate_generation_2>...</candidate_generation_2>
+
+    Inside every candidate you MUST include:
+    <think>...why this action is appropriate now...</think>
+    <action>...ONE atomic, executable action string...</action>
+
+    Do not include any extra text outside the candidate tags.
+    Use this format:
+    <candidate_generation_1>
+    <think>Explain why Candidate One is chosen</think>
+    <action>Candidate One Action</action>
+    </candidate_generation_1>
+
+    <candidate_generation_2>
+    <think>Explain why Candidate Two is chosen</think>
+    <action>Candidate Two Action</action>
+    </candidate_generation_2>
+    # Example 
+    <candidate_generation_1>
+    <think>The login button is visible and proceeding will reveal the auth form.</think>
+    <action>click(role="button", name="Log in")</action>
+    </candidate_generation_1>
+
+    <candidate_generation_2>
+    <think>User might need to enter email first; the email field is focused and visible.</think>
+    <action>fill(bid="a112", text="user@example.com")</action>
+    </candidate_generation_2>
+    """,
+            )
+        ]
+
+    # Regex patterns for numbered candidates only
+    _NUM_BLOCK = re.compile(
+        r"<\s*candidate[_ ]generation[_ ](?P<idx>[0-9]+)\s*>(?P<body>.*?)<\s*/\s*candidate[_ ]generation[_ ](?P=idx)\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    _THINK_PATTERN = re.compile(
+        r"<\s*think\s*>(?P<think>.*?)<\s*/\s*think\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    _ACTION_PATTERN = re.compile(
+        r"<\s*action\s*>(?P<action>.*?)<\s*/\s*action\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+
+    def _parse_answer(self, text_answer: str) -> Dict[str, Dict[str, str]]:
+        """Extract up to n_candidates candidates, using numbered tags only.
+
+        Args:
+            text_answer: The text response containing candidate generation tags.
+
+        Returns:
+            Dictionary mapping candidate names to their think and action content.
+            Format: {"candidate_generation_1": {"think": "...", "action": "..."}, ...}
+        """
+        result = {
+            f"candidate_generation_{i+1}": {"think": "", "action": ""}
+            for i in range(self.n_candidates)
+        }
+
+        if not isinstance(text_answer, str):
+            return result
+
+        matches: List[re.Match] = list(self._NUM_BLOCK.finditer(text_answer))
+        # Sort by numeric index
+        matches_sorted = sorted(matches, key=lambda m: int(m.group("idx")))
+        for i, m in enumerate(matches_sorted[: self.n_candidates]):
+            body = m.group("body").strip()
+            think_m = self._THINK_PATTERN.search(body)
+            action_m = self._ACTION_PATTERN.search(body)
+            result[f"candidate_generation_{i+1}"] = {
+                "think": (think_m.group("think").strip() if think_m else ""),
+                "action": (action_m.group("action").strip() if action_m else ""),
+            }
+
+        return result
+
+
+@dataclass
+class MultipleProposalGenericAgentArgs(GenericAgentArgs):
+
+    def make_agent(self):
+        return MultipleProposalGenericAgent(
+            chat_model_args=self.chat_model_args, flags=self.flags, max_retry=self.max_retry
+        )
+
+    def __post_init__(self):
+        """Prefix subagent name with 'HITL-'."""
+        super().__post_init__()
+        if hasattr(self, "agent_name") and self.agent_name:
+            self.agent_name = "HITL-" + self.agent_name
+
+
+class MultipleProposalGenericAgent(GenericAgent):
+
+    def __init__(
+        self,
+        chat_model_args,
+        flags,
+        max_retry: int = 4,
+    ):
+        super().__init__(chat_model_args, flags, max_retry)
+        self.ui = None  # Single HintLabeling instance
+
+    def get_candidate_generation(
+        self,
+        sys_prompt: SystemMessage,
+        human_prompt: HumanMessage,
+        hint: list[str] | None = None,
+        n_candidates=3,
+    ) -> tuple[Dict[str, Dict[str, str]], Discussion]:
+
+        cg = CandidatesGeneration(hint=hint, n_candidates=n_candidates)
+        candidates_prompt = HumanMessage(cg.prompt)
+        chat_messages = Discussion([sys_prompt, human_prompt, candidates_prompt])
+        output = self.chat_llm(chat_messages)
+        candidates = cg._parse_answer(output["content"])
+        self.step_n_human_intervention_rounds += 1
+        msg_to_add_to_xray = Discussion([sys_prompt, human_prompt])
+
+        return candidates, msg_to_add_to_xray
+
+    @cost_tracker_decorator
+    def get_action(self, obs):
+        # reset vars
+        step_hint = []
+        self.step_n_human_intervention_rounds = 0
+        self.obs_history.append(obs)
+        main_prompt = MainPrompt(
+            action_set=self.action_set,
+            obs_history=self.obs_history,
+            actions=self.actions,
+            memories=self.memories,
+            thoughts=self.thoughts,
+            previous_plan=self.plan,
+            step=self.plan_step,
+            flags=self.flags,
+        )
+
+        max_prompt_tokens, max_trunc_itr = self._get_maxes()
+
+        system_prompt = SystemMessage(dp.SystemPrompt().prompt)
+
+        human_prompt = dp.fit_tokens(
+            shrinkable=main_prompt,
+            max_prompt_tokens=max_prompt_tokens,
+            model_name=self.chat_model_args.model_name,
+            max_iterations=max_trunc_itr,
+            additional_prompts=system_prompt,
+        )
+        # Initialize UI once outside the loop
+        if self.ui is None:
+            self.ui = HintLabeling(headless=False)
+            # Show initial waiting state
+            initial_inputs = HintLabelingInputs(
+                goal=(
+                    obs.get("goal_object", [{}])[0].get("text", "")
+                    if obs.get("goal_object")
+                    else ""
+                ),
+                error_feedback="",
+                screenshot=(img_to_base_64(obs["screenshot"]) if "screenshot" in obs else ""),
+                screenshots=[],  # no overlay screenshots yet
+                axtree=obs.get("axtree_txt", ""),
+                hints=[],
+                suggestions=[],  # no suggestions yet
+            )
+            self.ui.update_context(initial_inputs)
+
+        # Generate first candidates
+        candidates, chat_messages = self.get_candidate_generation(
+            sys_prompt=system_prompt,
+            human_prompt=human_prompt,
+            hint=step_hint if step_hint else None,
+        )
+        suggestions = [
+            {
+                "id": key.split("_")[-1],
+                "action": candidate["action"],
+                "think": candidate["think"],
+            }
+            for key, candidate in candidates.items()
+        ]
+        # List of Images as base64 - create overlay screenshots for each suggestion
+        screenshots = [overlay_action(obs, choice["action"]) for choice in suggestions]
+
+        while True:
+            try:
+                hint_labeling_inputs = HintLabelingInputs(
+                    goal=(
+                        obs.get("goal_object", [{}])[0].get("text", "")
+                        if obs.get("goal_object")
+                        else ""
+                    ),
+                    error_feedback=obs.get("last_action_error", ""),
+                    screenshot=(img_to_base_64(obs["screenshot"]) if "screenshot" in obs else ""),
+                    screenshots=screenshots,  # list of overlay screenshots for hover
+                    axtree=obs.get("axtree_txt", ""),
+                    hints=step_hint,
+                    suggestions=suggestions,
+                )
+
+                self.ui.update_context(hint_labeling_inputs)
+                response = self.ui.wait_for_response(timeout=600)
+
+                if response["type"] == "reprompt":
+                    new_hints = response["payload"].get("hints", [])
+                    step_hint = list(new_hints) if isinstance(new_hints, list) else step_hint
+                    candidates, chat_messages = self.get_candidate_generation(
+                        sys_prompt=system_prompt,
+                        human_prompt=human_prompt,
+                        hint=step_hint if step_hint else None,
+                    )
+                    suggestions = [
+                        {
+                            "id": key.split("_")[-1],
+                            "action": candidate["action"],
+                            "think": candidate["think"],
+                        }
+                        for key, candidate in candidates.items()
+                    ]
+                    # Regenerate screenshots for new suggestions
+                    screenshots = [overlay_action(obs, choice["action"]) for choice in suggestions]
+                    # Continue the loop to show new suggestions
+                elif response["type"] == "step":
+                    selected_action = response["payload"]["action"]
+                    choice_idx = None
+                    for i, candidate in enumerate(suggestions, 1):
+                        if candidate["action"] == selected_action:
+                            choice_idx = i
+                            break
+                    if choice_idx is None:
+                        choice_idx = 1
+                    ans_dict = candidates[f"candidate_generation_{choice_idx}"]
+                    break
+                else:
+                    ans_dict = candidates["candidate_generation_1"]
+                    break
+
+            except KeyboardInterrupt:
+                print("User cancelled the operation")
+                if self.ui:
+                    self.ui.close()
+                raise
+            except Exception as e:
+                print(f"Error in human intervention UI: {e}")
+                if self.ui:
+                    self.ui.close()
+                    self.ui = None
+                # Raise exception instead of falling back to console input
+                raise RuntimeError(f"Human intervention UI failed: {e}") from e
+
+        # TODO: Refactor as discussed with ALAC.
+        stats = self.chat_llm.get_stats()
+        self.plan = ans_dict.get("plan", self.plan)
+        self.plan_step = ans_dict.get("step", self.plan_step)
+        self.actions.append(ans_dict["action"])
+        self.memories.append(ans_dict.get("memory", None))
+        self.thoughts.append(ans_dict.get("think", None))
+        agent_info = AgentInfo(
+            think=ans_dict.get("think", None),
+            chat_messages=chat_messages,
+            stats=stats,
+            extra_info={
+                "chat_model_args": asdict(self.chat_model_args),
+                "step_hints": step_hint,
+                "n_human_intervention_rounds": self.step_n_human_intervention_rounds,
+                "candidates": candidates,
+                "suggestions": suggestions,
+            },
+        )
+        return ans_dict["action"], agent_info
+
+
+def get_base_agent(llm_config):
+    """Creates and returns a MultipleProposalGenericAgentArgs instance with
+    specified LLM configuration from CHAT_MODEL_ARGS_DICT.
+
+    Args:
+        llm_config: The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
+
+    Returns:
+        MultipleProposalGenericAgentArgs: Configured agent arguments instance.
+    """
+
+    from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
+    from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
+
+    return MultipleProposalGenericAgentArgs(
+        chat_model_args=CHAT_MODEL_ARGS_DICT[llm_config],
+        flags=BASE_FLAGS,
+    )
+
+
+HUMAN_GUIDED_GENERIC_AGENT = get_base_agent("openai/gpt-5-mini-2025-08-07")
+
+if __name__ == "__main__":
+    import logging
+
+    from agentlab.agents.hitl_agent.generic_human_guided_agent import (
+        HUMAN_GUIDED_GENERIC_AGENT,
+    )
+    from agentlab.experiments.study import Study
+
+    agent_configs = [HUMAN_GUIDED_GENERIC_AGENT]
+    benchmark = bgym.DEFAULT_BENCHMARKS["miniwob"]()
+    benchmark = benchmark.subset_from_glob("task_name", "*book*")
+    benchmark.env_args_list = benchmark.env_args_list[3:4]
+
+    for env_args in benchmark.env_args_list:
+        env_args.max_steps = 100  # max human steps
+        env_args.headless = True
+
+    Study(agent_configs, benchmark, logging_level=logging.WARNING).run(
+        n_jobs=1,
+        parallel_backend="sequential",
+        n_relaunch=1,
+    )
diff --git a/src/agentlab/agents/hitl_agent/hint_labelling.py b/src/agentlab/agents/hitl_agent/hint_labelling.py
new file mode 100644
index 00000000..f1120f02
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/hint_labelling.py
@@ -0,0 +1,166 @@
+import json
+import logging
+from importlib import resources
+from queue import Queue
+from typing import Dict, List, Optional
+
+import playwright.sync_api
+from browsergym.core import _get_global_playwright
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+HINT_LABELING_DIR = resources.files("agentlab.agents.hitl_agent.hint_labelling_ui_files")
+
+
+class HintLabelingInputs(BaseModel):
+    goal: str
+    error_feedback: str = ""
+    screenshot: str  # base64 screenshot (original/current)
+    screenshots: List[str] = Field(default_factory=list)  # list of base64 screenshots for hover
+    axtree: str
+    hints: List[str] = Field(default_factory=list)
+    suggestions: List[Dict[str, str]] = Field(default_factory=list)
+
+
+class HintLabeling:
+    def __init__(self, headless: bool, *args, **kwargs):
+        pw_opt = _get_global_playwright()
+        pw: playwright.sync_api.Playwright = pw_opt  # type: ignore[assignment]
+        self.browser = pw.chromium.launch(headless=headless)
+        self.context = self.browser.new_context(
+            no_viewport=True,
+        )
+        self.page = self.context.new_page()
+        self._resp_queue = Queue()
+
+        self.page.route("**/api/reprompt", self._route_reprompt)
+        self.page.route("**/api/submit", self._route_submit)
+        self.page.set_content(get_hint_labeling_ui(HINT_LABELING_DIR))
+
+        # internal state
+        self._context = None
+        self._running = False
+
+    def _route_reprompt(
+        self, route: playwright.sync_api.Route, request: playwright.sync_api.Request
+    ):
+        logger.info("Route hit: %s %s", request.method, request.url)
+        try:
+            body = json.loads(request.post_data or "{}")
+        except Exception:
+            body = {}
+        # enqueue output 1 (reprompt)
+        hints = body.get("hints")
+        if not isinstance(hints, list):
+            # Back-compat: accept single 'hint' string
+            h = body.get("hint")
+            hints = [h] if isinstance(h, str) and h.strip() else []
+        msg = {"type": "reprompt", "payload": {"hints": hints}}
+        self._resp_queue.put(msg)
+        # Respond something minimal so UI doesn’t break; it will be refreshed by a later update_context()
+        route.fulfill(
+            status=200,
+            content_type="application/json",
+            body=json.dumps({"suggestions": []}),
+        )
+
+    def _route_submit(self, route: playwright.sync_api.Route, request: playwright.sync_api.Request):
+        logger.info("Route hit: %s %s", request.method, request.url)
+        try:
+            body = json.loads(request.post_data or "{}")
+        except Exception:
+            body = {}
+        # Map UI payload -> your step shape
+        msg = {
+            "type": "step",
+            "payload": {
+                "think": body.get("think", ""),
+                "action": body.get("action", ""),
+            },
+        }
+        self._resp_queue.put(msg)
+        # UI expects 200 JSON; we can optionally send new suggestions here too.
+        route.fulfill(
+            status=200,
+            content_type="application/json",
+            body=json.dumps({"suggestions": []}),
+        )
+
+    def _to_ui_bootstrap(self, ctx: HintLabelingInputs) -> dict:
+        return {
+            "goal": ctx.goal,
+            "error_feedback": ctx.error_feedback,
+            "screenshot": ctx.screenshot,
+            "screenshots": ctx.screenshots,  # list of screenshots for hover
+            "axtree": ctx.axtree,
+            "hints": ctx.hints,
+            "suggestions": ctx.suggestions,
+        }
+
+    def update_context(self, context: HintLabelingInputs):
+        self._context = context
+        ui_payload = self._to_ui_bootstrap(context)
+        # call JS function with arg (no string concat)
+        self.page.evaluate("(d) => updateContext(d)", ui_payload)
+
+    def wait_for_response(self, timeout: Optional[float] = 600) -> dict:
+        """
+        Wait until the page makes a request to /api/reprompt or /api/submit,
+        then parse the request body and return it in your schema.
+
+        Args:
+            timeout (Optional[float]): Maximum time to wait for the request in seconds. If None or 0,
+                waits indefinitely. Defaults to 600 seconds.
+
+        Returns:
+            dict: A dictionary containing the parsed response with 'type' and 'payload' keys.
+                For /api/reprompt: {'type': 'reprompt', 'payload': {'hints': list[str]}}
+                For /api/submit: {'type': 'step', 'payload': {'think': str, 'action': str}}
+
+        """
+        logger.info("Waiting for response from Hint Labeling UI...")
+
+        def is_api(req: playwright.sync_api.Request) -> bool:
+            u = req.url
+            return (
+                u.endswith("/api/reprompt") or u.endswith("/api/submit")
+            ) and req.method == "POST"
+
+        # This pumps Playwright internally; no busy waiting.
+        with self.page.expect_request(
+            is_api, timeout=(timeout * 1000 if timeout else 0)
+        ) as req_info:
+            req = req_info.value
+
+        body_text = req.post_data or "{}"
+        try:
+            body = json.loads(body_text)
+        except Exception as e:
+            print("JSON parse error:", e)
+            body = {}
+
+        if req.url.endswith("/api/reprompt"):
+            hints = body.get("hints")
+            if not isinstance(hints, list):
+                h = body.get("hint")
+                hints = [h] if isinstance(h, str) and h.strip() else []
+            msg = {"type": "reprompt", "payload": {"hints": hints}}
+        else:
+            msg = {
+                "type": "step",
+                "payload": {"think": body.get("think", ""), "action": body.get("action", "")},
+            }
+
+        logger.info("Response received: %s", msg)
+        return msg
+
+    def close(self):
+        self.context.close()
+        self.browser.close()
+
+
+def get_hint_labeling_ui(hint_labeling_dir) -> str:
+    with open(hint_labeling_dir / "hint_labeling_ui.html", "r") as file:
+        hint_labeling_html = file.read()
+    return hint_labeling_html
diff --git a/src/agentlab/agents/hitl_agent/hint_labelling_ui_files/hint_labeling_ui.html b/src/agentlab/agents/hitl_agent/hint_labelling_ui_files/hint_labeling_ui.html
new file mode 100644
index 00000000..a2c7b540
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/hint_labelling_ui_files/hint_labeling_ui.html
@@ -0,0 +1,703 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <base href="http://route.local/"><!-- NEW: gives fetch a resolvable origin -->
+  <title>Agent Reprompt UI</title>
+  <style>
+    :root{
+      --bg:#f4f6f8; --card:#fff; --muted:#6b7280; --text:#0f172a; --brand:#2563eb; --accent:#10b981; --danger:#ef4444; --border:#e5e7eb;
+    }
+    *{box-sizing:border-box}
+  body{margin:0;font-family:Inter,system-ui,Segoe UI,Roboto,Helvetica,Arial,sans-serif;background:var(--bg);color:var(--text)}
+    .container{max-width:1600px;margin:24px auto;padding:0 16px}
+
+    .grid{
+      display:grid;gap:16px;
+      grid-template-columns: 1fr 1fr;
+    }
+    .card{background:var(--card);border:1px solid var(--border);border-radius:16px;box-shadow:0 2px 6px rgba(0,0,0,.05)}
+    .card h2{margin:0 0 8px 0;font-size:14px;text-transform:uppercase;letter-spacing:.06em;color:var(--muted)}
+    .pad{padding:16px}
+
+    .tabs{display:flex;gap:8px;padding:8px 8px 0}
+    .tab{border:none;background:transparent;padding:10px 14px;border-radius:12px 12px 0 0;cursor:pointer;font-weight:600;color:var(--muted)}
+    .tab.active{background:var(--card);border:1px solid var(--border);border-bottom:none;color:var(--text)}
+    .tabpanel{border-top:1px solid var(--border)}
+
+  .screenshot{width:100%;height:auto;max-height:65vh;object-fit:contain;background:#0000000d;border-radius:8px}
+    .axtree{width:100%;height:520px;resize:none;border:none;padding:12px;font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;background:#0b10241a}
+
+    .hints-row{display:grid;grid-template-columns: 1fr 140px;gap:12px;align-items:start}
+    textarea.hint{width:100%;min-height:120px;resize:vertical;padding:12px;border:1px solid var(--border);border-radius:12px;font-size:14px}
+    .btn{display:inline-flex;align-items:center;justify-content:center;gap:8px;border:none;border-radius:12px;padding:12px 16px;font-weight:600;cursor:pointer}
+    .btn-primary{background:var(--brand);color:#fff}
+    .btn-primary[disabled]{opacity:.6;cursor:not-allowed}
+    .btn-ghost{background:transparent;border:1px solid var(--border)}
+
+  /* Hint rows with removable controls */
+  .hint-row{display:flex;gap:8px;align-items:stretch}
+  .hint-row textarea.hint{flex:1;margin:0}
+  .remove-hint{width:36px;min-width:36px;height:36px;line-height:1;border-radius:10px;padding:0;font-size:18px;color:#64748b}
+  .remove-hint:hover{background:#f8fafc}
+  .hint:disabled{background:#f1f5f9;color:#94a3b8}
+  .btn-ghost[disabled]{opacity:.6;cursor:not-allowed}
+
+  .choices{margin-top:12px;display:flex;flex-direction:column;gap:10px;max-height:60vh;overflow:auto}
+    .choice{display:grid;grid-template-columns:32px 1fr;gap:12px;align-items:start;background:#ffffff;border:1px solid var(--border);border-radius:14px;padding:12px}
+    .choice.selected{border:2px solid var(--accent);background:#f0fdf4}
+    .choice.disabled{opacity:0.5;pointer-events:none}
+    .choice input[type="radio"]{margin-top:6px;width:18px;height:18px}
+    .choice .action{font-weight:800}
+    .choice .row{display:flex;gap:6px;flex-wrap:wrap}
+    .choice .label{font-weight:700}
+    .choice .value{color:#0f172a}
+    .choice .reason{font-size:13px;color:#111827}
+    .choice .reason .value{white-space:pre-wrap; overflow-wrap:anywhere; word-break:break-word}
+
+    .footer{display:flex;justify-content:flex-end;gap:12px;margin-top:10px}
+
+    .banner{margin:12px 0;padding:10px 12px;border-radius:10px;font-size:14px}
+    .banner.info{background:#dbeafe;border:1px solid #bfdbfe}
+    .banner.error{background:#fee2e2;border:1px solid #fecaca;color:#991b1b}
+
+    .pill{display:inline-block;padding:6px 10px;border-radius:999px;background:#f1f5f9;color:#0f172a;border:1px solid var(--border);font-size:12px}
+
+  /* Timeline styles */
+  .timeline-wrap{margin-top:16px}
+  .timeline{display:flex;align-items:center;gap:10px;padding:10px 12px;background:var(--card);border:1px solid var(--border);border-radius:12px}
+  .timeline .dot{width:12px;height:12px;border-radius:999px;background:#cbd5e1;cursor:pointer;transition:transform .1s ease-in-out}
+  .timeline .dot:hover{transform:scale(1.2)}
+  .timeline .dot.active{background:var(--brand);box-shadow:0 0 0 4px rgba(37,99,235,.15)}
+  .timeline .label{margin-left:auto;font-size:12px;color:var(--muted)}
+  .history-notice{margin-top:8px}
+  /* step numbers above dots */
+  .timeline .dot{position:relative}
+  .timeline .dot::after{content: attr(data-step); position:absolute; top:-16px; left:50%; transform:translateX(-50%); font-size:10px; color:var(--muted)}
+
+  /* New: split layout for screenshot/tabs and hints+suggestions side-by-side */
+  .split{display:grid;gap:16px;grid-template-columns: 1.3fr 1fr;align-items:start;margin-top:16px}
+  .right-stack{display:flex;flex-direction:column;gap:16px}
+
+    @media (max-width: 900px){
+      .grid{grid-template-columns: 1fr}
+      .split{grid-template-columns: 1fr}
+      .axtree{height:420px}
+      .screenshot{width:100%;height:auto;max-width:800px}
+    }
+
+    #goalBox,
+    #errorBox {
+      white-space: pre-wrap;     /* respects \n; collapses multiple spaces nicely */
+      overflow-wrap: anywhere;   /* wrap very long tokens (URLs/unbroken text) */
+      word-break: break-word;    /* fallback for older engines */
+    }
+
+    .progress-area {
+      animation: pulse 2s infinite;
+    }
+
+    @keyframes pulse {
+      0%, 100% { opacity: 1; }
+      50% { opacity: 0.7; }
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <!-- Timeline + notice -->
+    <div class="timeline-wrap">
+      <div id="timeline" class="timeline" role="tablist" aria-label="Context updates timeline">
+        <!-- dots injected here -->
+        <span id="timelineLabel" class="label"></span>
+      </div>
+      <div id="historyNotice" class="banner info history-notice" style="display:none"></div>
+    </div>
+    <!-- Top: Goal & Error -->
+    <div class="grid" style="display: flex; gap: 16px;">
+      <div class="card pad" style="flex: 1; height: 150px;">
+        <h2>Goal</h2>
+        <div id="goalBox" style="padding: 12px 14px; font-size: 15px; background: #f8fafc; height: 80%; overflow-y: auto;"></div>
+      </div>
+      <div class="card pad" style="flex: 1; height: 150px;">
+        <h2>Error Feedback</h2>
+        <div id="errorBox" style="padding: 12px 14px; font-size: 15px; background: #fef2f2; height: 80%; overflow-y: auto;"></div>
+      </div>
+    </div>
+
+    <!-- Middle: Two-column split -->
+    <div class="split">
+      <!-- Left: Hints and Suggestions stacked -->
+      <div class="right-stack">
+        <!-- Hints & Reprompt -->
+        <div class="card pad" id="hintsSection">
+          <h2>Hints</h2>
+          <!-- Dynamic hints inputs will be injected here above the button -->
+          <button id="repromptBtn" class="btn btn-primary" title="Send hint to get refreshed suggestions" style="margin-top: 12px;">Reprompt with Hint</button>
+          <div id="repromptStatus" class="banner info" style="display:none"></div>
+        </div>
+
+        <!-- Suggestions / Radio list -->
+        <div class="card pad">
+          <h2>Suggestions</h2>
+          <div id="choices" class="choices"></div>
+          <div id="choicesNote" class="banner info" style="display:none" title="Hover to see more details"></div>
+          <div class="footer">
+            <button id="submitBtn" class="btn btn-primary" disabled title="Select an action to enable">Send Action</button>
+          </div>
+          <div id="submitStatus" class="banner info" style="display:none" title="Hover to see submission status"></div>
+        </div>
+      </div>
+
+      <!-- Right: Tabs with Screenshot/AxTree/History -->
+      <div class="card">
+        <div class="tabs">
+          <button class="tab active" data-tab="screenshot">Screenshot</button>
+          <button class="tab" data-tab="axtree">AxTree</button>
+        </div>
+        <div class="pad tabpanel">
+          <div id="tab-screenshot" class="tabcontent">
+            <img id="screenshotImg" alt="screenshot" class="screenshot" />
+          </div>
+          <div id="tab-axtree" class="tabcontent" hidden>
+            <textarea id="axtreeArea" class="axtree" readonly style="font-size: 12px; white-space: pre; overflow-wrap: normal;"></textarea>
+          </div>
+          
+        </div>
+      </div>
+    </div>
+
+    <!-- Progress/Status Area - Hidden by default, shown only when there's status -->
+    <div id="progressContainer" class="card pad" style="margin-top:16px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3); display: none;">
+      <h2 style="color: white; margin-bottom: 8px;">Status</h2>
+      <div id="progressArea" class="progress-area" style="font-size: 16px; font-weight: 600; text-align: center; padding: 16px; background: rgba(255, 255, 255, 0.1); border-radius: 12px; min-height: 60px; display: flex; align-items: center; justify-content: center;">
+        Waiting for first response...
+      </div>
+    </div>
+  </div>
+
+  <script>
+    /**
+     * Bootstrapping contract
+     * You can overwrite window.__BOOTSTRAP_DATA__ from your server-side template.
+     * Fields:
+     *   goal: string
+     *   error_feedback: string
+     *   screenshot: base64 string (no data: prefix required)
+     *   screenshots: Array<string> - list of base64 screenshots for hover (same length as suggestions)
+     *   axtree: string
+  *   hints: Array<string>
+     *   suggestions: Array<{ action: string, think: string, id?: string }>
+     */
+  window.__BOOTSTRAP_DATA__ = window.__BOOTSTRAP_DATA__ || {
+      goal: "go to the hardware catalog store and order a developer laptop",
+      error_feedback: "playwright error when clicking on something that is not visible (from the previous step)",
+      screenshot: "", // fill with base64 (PNG/JPG). When empty, we show a placeholder.
+      screenshots: [], // list of base64 screenshots for hover
+  axtree: "<root>\n  <window name=\"VITASPHERE\">…</window>\n</root>",
+  hints: [],
+      suggestions: [
+        { id: "1", action: "click(\"42\")", think: "The button with id 42 advances the form." },
+        { id: "2", action: "type(\"Assigned to\", \"John Doe\")", think: "Fills the assignee field before submission." },
+        { id: "3", action: "open(\"/hardware-catalog\")", think: "Navigate directly to the catalog page." }
+      ]
+    };
+
+  var RECEIVED_RESPONSE = false;
+  var originalScreenshot = ""; // store original screenshot
+  var hoverScreenshots = []; // store screenshots for hover
+  var hoverEnabled = true; // track if hover behavior is enabled
+
+  // Timeline state
+  let timeline = []; // {data, event, ts, meta}
+    let timelineIndex = -1;
+  let hintsLockedUntilNextSnapshot = false;
+
+    function applyContext(d){
+      goalBox.textContent = d.goal || '';
+      errorBox.textContent = d.error_feedback || '';
+      originalScreenshot = d.screenshot || '';
+      hoverScreenshots = Array.isArray(d.screenshots) ? d.screenshots : [];
+      screenshotImg.src = dataUrlFromBase64(originalScreenshot);
+      axtreeArea.value = d.axtree || '';
+      if (Array.isArray(d.suggestions)) {
+        renderSuggestions(d.suggestions);
+      }
+      // render hints list from array (fallback to single hint string)
+      const incomingHints = Array.isArray(d.hints)
+        ? d.hints
+        : (d.hint ? [d.hint] : []);
+      renderHints(incomingHints);
+    }
+
+    // REPLACE your old updateContext with this:
+    function updateContext(data){
+      // push new snapshot to timeline and render
+      const d = data || {};
+      window.__BOOTSTRAP_DATA__ = d;
+      // If backend delivered new suggestions, ensure hints unlock for the latest snapshot
+      if (Array.isArray(d.suggestions) && d.suggestions.length > 0) {
+        hintsLockedUntilNextSnapshot = false;
+      }
+      pushSnapshot('update', d);
+  setTimeout(ensureLatestEditable, 0);
+    }
+
+    // Placeholder endpoints (replace later)
+    const ENDPOINTS = {
+      REPROMPT: "/api/reprompt",   // expects POST {hints: string[]} -> returns {suggestions: [...]} 
+  SUBMIT: "/api/submit"        // expects POST {action, think, id?} -> returns {suggestions?: [...]} (optional)
+    };
+
+    // DOM references
+    const goalBox = document.getElementById('goalBox');
+    const errorBox = document.getElementById('errorBox');
+    const screenshotImg = document.getElementById('screenshotImg');
+    const axtreeArea = document.getElementById('axtreeArea');
+  // Hints UI elements (dynamic list)
+  let hintsContainer;
+  let addHintBtn;
+    const repromptBtn = document.getElementById('repromptBtn');
+    const repromptStatus = document.getElementById('repromptStatus');
+    const choicesEl = document.getElementById('choices');
+    const choicesNote = document.getElementById('choicesNote');
+    const submitBtn = document.getElementById('submitBtn');
+    const submitStatus = document.getElementById('submitStatus');
+    const progressArea = document.getElementById('progressArea');
+    const progressContainer = document.getElementById('progressContainer');
+  const timelineEl = document.getElementById('timeline');
+  const timelineLabel = document.getElementById('timelineLabel');
+  const historyNotice = document.getElementById('historyNotice');
+  
+
+    // State
+    let currentSuggestions = [];
+    let selectedId = null;
+
+    // Ensure latest snapshot is editable for hints and reprompt
+    function ensureLatestEditable(){
+      const isLatest = (timelineIndex === -1) || (timelineIndex === timeline.length - 1);
+      if (isLatest) {
+        hintsLockedUntilNextSnapshot = false;
+        setHintsEditable(true);
+        if (repromptBtn) repromptBtn.disabled = false;
+      }
+    }
+
+    // Helpers
+    function setVisible(el, visible){ el.style.display = visible ? '' : 'none'; }
+    function setBanner(el, text, variant='info'){ el.className = `banner ${variant}`; el.textContent = text; setVisible(el,true); }
+    function updateProgress(message, showAnimation = true) {
+      progressArea.textContent = message;
+      if (showAnimation) {
+        progressArea.style.animation = 'pulse 2s infinite';
+      } else {
+        progressArea.style.animation = 'none';
+      }
+      // Show the progress container when there's a message
+      setVisible(progressContainer, true);
+    }
+    function hideProgress() {
+      setVisible(progressContainer, false);
+    }
+
+    function updateModeForSnapshot(){
+      const isLatest = timelineIndex === timeline.length - 1;
+      repromptBtn.disabled = !isLatest;
+      // Only force-disable submit when not on latest snapshot; when latest, selection controls it
+  if (!isLatest) submitBtn.disabled = true;
+  setHintsEditable(isLatest && !hintsLockedUntilNextSnapshot);
+      if (!isLatest){
+        setBanner(historyNotice, 'Viewing past context snapshot. Use Left/Right arrows to navigate. Press End to go to latest.', 'info');
+      } else {
+        setVisible(historyNotice, false);
+      }
+    }
+
+    function dataUrlFromBase64(b64){
+      if(!b64) return 'data:image/svg+xml;charset=utf-8,' + encodeURIComponent(`<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"1600\" height=\"900\"><rect width=\"100%\" height=\"100%\" fill=\"#eef2ff\"/><text x=\"50%\" y=\"50%\" font-family=\"sans-serif\" font-size=\"24\" text-anchor=\"middle\" fill=\"#64748b\">No screenshot provided</text></svg>`);
+      // naive sniff for png/jpg
+      const pref = b64.trim().startsWith('/') || b64.trim().startsWith('iVBOR') ? 'image/png' : 'image/jpeg';
+      return `data:${pref};base64,${b64}`;
+    }
+
+    function renderSuggestions(suggestions){
+      currentSuggestions = suggestions.slice(0,5); // cap at 5
+      choicesEl.innerHTML = '';
+      selectedId = null;
+      submitBtn.disabled = true;
+      hoverEnabled = true; // Re-enable hover when new suggestions are rendered
+
+      // Hide progress when new suggestions arrive - user is ready for interaction
+      hideProgress();
+
+      if(currentSuggestions.length === 0){
+        setBanner(choicesNote, 'No suggestions yet. Please Wait..');
+        return;
+      }
+      setVisible(choicesNote,false);
+
+      currentSuggestions.forEach((sugg, idx)=>{
+        const id = sugg.id || String(idx+1);
+        const wrapper = document.createElement('label');
+        wrapper.className = 'choice';
+        wrapper.setAttribute('for', `choice-${id}`);
+
+        // Add hover event listeners for screenshot changes
+        const screenshotForThisChoice = hoverScreenshots[idx] || originalScreenshot;
+        wrapper.addEventListener('mouseenter', () => {
+          if (hoverEnabled && screenshotForThisChoice && screenshotForThisChoice !== originalScreenshot) {
+            screenshotImg.src = dataUrlFromBase64(screenshotForThisChoice);
+          }
+        });
+        wrapper.addEventListener('mouseleave', () => {
+          if (hoverEnabled) {
+            screenshotImg.src = dataUrlFromBase64(originalScreenshot);
+          }
+        });
+
+        const radio = document.createElement('input');
+        radio.type = 'radio';
+        radio.name = 'choice';
+        radio.id = `choice-${id}`;
+        radio.value = id;
+        radio.addEventListener('change', ()=>{ selectedId = id; submitBtn.disabled = false; });
+
+        const box = document.createElement('div');
+        const actionRow = document.createElement('div');
+        actionRow.className = 'row';
+        const actionLabel = document.createElement('span');
+        actionLabel.className = 'label action';
+        actionLabel.textContent = '';
+        const actionVal = document.createElement('span');
+        actionVal.className = 'value action';
+        actionVal.textContent = `${sugg.action}`;
+        actionRow.appendChild(actionLabel); actionRow.appendChild(actionVal);
+
+        const reasonRow = document.createElement('div');
+        reasonRow.className = 'row reason';
+        const reasonLabel = document.createElement('span');
+        reasonLabel.className = 'label';
+        reasonLabel.textContent = 'reasoning:';
+        const reasonVal = document.createElement('span');
+        reasonVal.className = 'value';
+  // Let the reasoning grow naturally; wrapping handled in CSS
+  reasonVal.style.maxHeight = '';
+  reasonVal.style.overflowY = '';
+        reasonVal.textContent = ` ${sugg.think}`;
+        reasonRow.appendChild(reasonLabel); reasonRow.appendChild(reasonVal);
+
+        box.appendChild(actionRow);
+        box.appendChild(reasonRow);
+
+        wrapper.appendChild(radio);
+        wrapper.appendChild(box);
+        choicesEl.appendChild(wrapper);
+      });
+
+  // Ensure latest snapshot controls are enabled
+  ensureLatestEditable();
+    }
+
+    function currentSelection(){
+      if(!selectedId) return null;
+      const obj = currentSuggestions.find(s=> (s.id||String(currentSuggestions.indexOf(s)+1)) === selectedId);
+      return obj || null;
+    }
+
+    // Tab logic
+    document.querySelectorAll('.tab').forEach(btn=>{
+      btn.addEventListener('click',()=>{
+        document.querySelectorAll('.tab').forEach(b=>b.classList.remove('active'));
+        btn.classList.add('active');
+        const name = btn.dataset.tab;
+        document.querySelectorAll('.tabcontent').forEach(c=>c.hidden = true);
+        document.getElementById('tab-'+name).hidden = false;
+      });
+    });
+
+    // Actions
+    repromptBtn.addEventListener('click', async ()=>{
+      updateProgress('Requesting new suggestions...', true);
+      try{
+        const hints = collectHints();
+        const res = await fetch(ENDPOINTS.REPROMPT,{
+          method:'POST', headers:{'Content-Type':'application/json'},
+          body: JSON.stringify({ hints })
+        });
+  
+        // Lock current hints until a new snapshot arrives
+        hintsLockedUntilNextSnapshot = true;
+        setHintsEditable(false);
+        // Don't expect a response - the backend will update the UI via updateContext
+        updateProgress('Hint sent. Waiting for new suggestions...', true);
+      }catch(err){
+        updateProgress('Error: ' + String(err), false);
+      } finally{
+        setTimeout(()=>hideProgress(), 2000);
+      }
+    });
+
+    submitBtn.addEventListener('click', async ()=>{
+      const selection = currentSelection();
+      if(!selection){ return; }
+      updateProgress('Submitting selection...', true);
+      submitBtn.disabled = true;
+
+      // Find the index of the selected suggestion to get its screenshot
+      const selectedIndex = currentSuggestions.findIndex(s => (s.id || String(currentSuggestions.indexOf(s) + 1)) === selectedId);
+      const selectedScreenshot = hoverScreenshots[selectedIndex] || originalScreenshot;
+
+      // Show the selected option's screenshot instead of waiting message
+      if (selectedScreenshot) {
+        screenshotImg.src = dataUrlFromBase64(selectedScreenshot);
+      }
+
+      // Apply visual states to options
+      const allChoices = choicesEl.querySelectorAll('.choice');
+      allChoices.forEach((choice, idx) => {
+        const choiceId = currentSuggestions[idx].id || String(idx + 1);
+        if (choiceId === selectedId) {
+          choice.classList.add('selected');
+          choice.classList.remove('disabled');
+        } else {
+          choice.classList.add('disabled');
+          choice.classList.remove('selected');
+        }
+      });
+
+      // Record selection into snapshot meta for step counting and history visuals
+      try {
+        if (timeline[timelineIndex]) {
+          timeline[timelineIndex].meta = timeline[timelineIndex].meta || {};
+          timeline[timelineIndex].meta.selectedAction = selection.action;
+          timeline[timelineIndex].meta.selectedId = selectedId;
+          renderTimeline();
+        }
+      } catch {}
+
+      // Disable hover behavior
+      hoverEnabled = false;
+
+      // Reset UI to ideal state
+      document.querySelectorAll('input[name="choice"]').forEach(r=> r.checked=false);
+      selectedId = null;
+      submitBtn.disabled = true;
+  clearHintsUI();
+
+      try{
+        const payload = { action: selection.action, think: selection.think, id: selection.id };
+        const res = await fetch(ENDPOINTS.SUBMIT,{
+          method:'POST', headers:{'Content-Type':'application/json'},
+          body: JSON.stringify(payload)
+        });
+        
+        // Don't expect a response - the backend will handle the selection
+        updateProgress('Selection submitted successfully!', false);
+      }catch(err){
+        updateProgress('Error: ' + String(err), false);
+      } finally{
+        setTimeout(()=>updateProgress('Waiting for LLM response...', false), 5000);
+      }
+    });
+
+    // Initial render from BOOTSTRAP_DATA
+    (function init(){
+      // setup hints UI
+      setupHintsUI();
+      // prime timeline with initial data
+      const d = window.__BOOTSTRAP_DATA__;
+      // Do not add a placeholder snapshot; just render the initial context
+      applyContext(d);
+      const initHints = Array.isArray(d.hints) ? d.hints : (d.hint ? [d.hint] : []);
+      renderHints(initHints);
+      // Keyboard navigation for timeline
+      document.addEventListener('keydown', (e)=>{
+        const tag = (document.activeElement && document.activeElement.tagName) || '';
+        if (tag === 'TEXTAREA' || tag === 'INPUT') return; // don't hijack text editing
+        if (e.key === 'ArrowLeft') { goRelative(-1); }
+        else if (e.key === 'ArrowRight') { goRelative(1); }
+        else if (e.key === 'Home') { goTo(0); }
+        else if (e.key === 'End') { goTo(timeline.length - 1); }
+      });
+  // enable hints at start
+  setHintsEditable(true);
+    })();
+
+    // Timeline helpers
+    function deepClone(obj){ try { return JSON.parse(JSON.stringify(obj)); } catch { return obj; } }
+    function pushSnapshot(event, data){
+      // Avoid adding placeholder snapshots when suggestions are missing or empty
+      const suggs = (data && Array.isArray(data.suggestions)) ? data.suggestions : [];
+      if (!Array.isArray((data||{}).suggestions) || suggs.length === 0){
+        applyContext(data || {});
+        const hints0 = Array.isArray((data||{}).hints) ? data.hints : ((data||{}).hint ? [data.hint] : []);
+        renderHints(hints0);
+        // Even if we didn't add a snapshot, if suggestions were updated later, unlock
+        if (Array.isArray(suggs) && suggs.length > 0) {
+          hintsLockedUntilNextSnapshot = false;
+        }
+        updateModeForSnapshot();
+        return;
+      }
+      const snap = { event, ts: Date.now(), data: deepClone(data), meta: {} };
+      // Apply on push to set context and then record it
+      applyContext(snap.data);
+      // Ensure hints UI reflects incoming data
+      const incomingHints = Array.isArray(snap.data.hints) ? snap.data.hints : (snap.data.hint ? [snap.data.hint] : []);
+      renderHints(incomingHints);
+      timeline.push(snap);
+      timelineIndex = timeline.length - 1;
+  renderTimeline();
+  // New snapshot unlocks hints (unless we immediately navigate away)
+  hintsLockedUntilNextSnapshot = false;
+      updateModeForSnapshot();
+  // Ensure latest snapshot is editable for hints
+  setHintsEditable(true);
+  // Also re-enable reprompt on the latest snapshot
+  if (repromptBtn) repromptBtn.disabled = false;
+  setTimeout(ensureLatestEditable, 0);
+    }
+
+    function goTo(i){
+      if (i < 0 || i >= timeline.length) return;
+      timelineIndex = i;
+      const snap = timeline[timelineIndex];
+      applyContext(snap.data);
+      // set hints from snapshot
+      const incomingHints = Array.isArray(snap.data.hints) ? snap.data.hints : (snap.data.hint ? [snap.data.hint] : []);
+      renderHints(incomingHints);
+      // If a selection was made on this snapshot, restore its visual state
+      if (snap.meta && snap.meta.selectedAction){
+        const selAction = snap.meta.selectedAction;
+        const allChoices = choicesEl.querySelectorAll('.choice');
+        allChoices.forEach((choice, idx) => {
+          const sugg = currentSuggestions[idx];
+          if (!sugg) return;
+          if (sugg.action === selAction){
+            choice.classList.add('selected');
+            choice.classList.remove('disabled');
+          } else {
+            choice.classList.add('disabled');
+            choice.classList.remove('selected');
+          }
+        });
+      }
+      renderTimeline();
+      updateModeForSnapshot();
+    }
+    function goRelative(d){ goTo(timelineIndex + d); }
+
+    function renderTimeline(){
+      // clear existing dots (except label span)
+      Array.from(timelineEl.querySelectorAll('.dot')).forEach(n => n.remove());
+      const count = timeline.length;
+      let stepsSoFar = 0;
+      for (let i = 0; i < count; i++){
+        const dot = document.createElement('div');
+        dot.className = 'dot' + (i === timelineIndex ? ' active' : '');
+        dot.title = `Snapshot ${i+1} of ${count}`;
+        dot.setAttribute('role','tab');
+  dot.setAttribute('aria-selected', String(i === timelineIndex));
+        if (timeline[i] && timeline[i].meta && timeline[i].meta.selectedAction){ stepsSoFar += 1; }
+        dot.setAttribute('data-step', stepsSoFar > 0 ? String(stepsSoFar) : '');
+        dot.addEventListener('click', ()=> goTo(i));
+        timelineEl.insertBefore(dot, timelineLabel);
+      }
+      timelineLabel.textContent = count ? `Snapshot ${timelineIndex+1} / ${count}` : '';
+    }
+
+    
+
+    // Hints UI logic
+    function setupHintsUI(){
+      const hintsSection = document.getElementById('hintsSection');
+      hintsContainer = document.createElement('div');
+      hintsContainer.id = 'hintsContainer';
+      hintsContainer.style.display = 'flex';
+      hintsContainer.style.flexDirection = 'column';
+      hintsContainer.style.gap = '8px';
+
+      addHintBtn = document.createElement('button');
+      addHintBtn.id = 'addHintBtn';
+      addHintBtn.className = 'btn btn-ghost';
+      addHintBtn.type = 'button';
+      addHintBtn.textContent = '+ add hint';
+      addHintBtn.title = 'Add another hint textbox';
+      addHintBtn.addEventListener('click', ()=> addHintTextbox(''));
+
+      hintsSection.insertBefore(hintsContainer, hintsSection.querySelector('#repromptBtn'));
+      hintsSection.insertBefore(addHintBtn, hintsSection.querySelector('#repromptBtn'));
+    }
+
+    function addHintTextbox(value){
+      const row = document.createElement('div');
+      row.className = 'hint-row';
+
+      const ta = document.createElement('textarea');
+      ta.className = 'hint';
+      ta.placeholder = 'Type guidance for the next reprompt…';
+      ta.style.width = '100%';
+      ta.value = value || '';
+
+      const rm = document.createElement('button');
+      rm.type = 'button';
+      rm.className = 'btn btn-ghost remove-hint';
+      rm.title = 'Remove this hint';
+      rm.setAttribute('aria-label','Remove hint');
+      rm.textContent = '×';
+      rm.addEventListener('click', ()=>{
+        row.remove();
+        // Ensure at least one textbox remains
+        if (hintsContainer.querySelectorAll('textarea.hint').length === 0){
+          addHintTextbox('');
+        }
+      });
+
+      row.appendChild(ta);
+      row.appendChild(rm);
+      hintsContainer.appendChild(row);
+      return ta;
+    }
+
+    function setHintsEditable(enabled){
+      // Toggle textareas
+      (hintsContainer ? hintsContainer.querySelectorAll('textarea.hint') : []).forEach((ta)=>{
+        ta.disabled = !enabled;
+      });
+      // Toggle add button
+      if (addHintBtn) addHintBtn.disabled = !enabled;
+      // Toggle remove buttons
+      (hintsContainer ? hintsContainer.querySelectorAll('.remove-hint') : []).forEach((btn)=>{
+        btn.disabled = !enabled;
+      });
+    }
+
+    function renderHints(hintsArray){
+      if (!hintsContainer) return;
+      hintsContainer.innerHTML = '';
+      const items = (Array.isArray(hintsArray) ? hintsArray : []).filter(h => typeof h === 'string');
+      if (items.length === 0) {
+        // start with one empty textbox by default
+        addHintTextbox('');
+      } else {
+        items.forEach(h => addHintTextbox(h));
+      }
+    }
+
+    function collectHints(){
+      if (!hintsContainer) return [];
+      return Array.from(hintsContainer.querySelectorAll('textarea.hint'))
+        .map(ta => (ta.value || '').trim())
+        .filter(v => v.length > 0);
+    }
+
+    function clearHintsUI(){
+      if (!hintsContainer) return;
+      hintsContainer.innerHTML = '';
+      addHintTextbox('');
+    }
+  </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/src/agentlab/agents/hitl_agent/hitl_agent.py b/src/agentlab/agents/hitl_agent/hitl_agent.py
new file mode 100644
index 00000000..9b84793b
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/hitl_agent.py
@@ -0,0 +1,205 @@
+from dataclasses import dataclass
+from typing import Optional
+
+import bgym
+import playwright
+from browsergym.experiments.agent import Agent
+
+from agentlab.agents.agent_args import AgentArgs
+from agentlab.agents.agent_utils import overlay_action
+from agentlab.agents.hitl_agent.base_multi_candidate_agent import MultiCandidateAgent
+from agentlab.agents.hitl_agent.hint_labelling import (
+    HintLabeling,
+    HintLabelingInputs,
+)
+from agentlab.llm.llm_utils import img_to_base_64
+from agentlab.llm.tracking import cost_tracker_decorator
+
+
+class HumanInTheLoopAgent(Agent):
+
+    def __init__(
+        self,
+        subagent_args,  # Type: any object with MultiCandidateAgent interface
+    ):
+        self.subagent: MultiCandidateAgent = subagent_args.make_agent()
+        super().__init__()
+        self.ui = None
+
+    @cost_tracker_decorator
+    def get_action(self, obs):
+        # reset vars
+        step_n_human_intervention_rounds = 0
+        step_hint = []
+
+        # Initialize UI once outside the loop
+        if self.ui is None:
+            self.ui = HintLabeling(headless=False)
+            # Show initial waiting state
+            initial_inputs = HintLabelingInputs(
+                goal=(
+                    obs.get("goal_object", [{}])[0].get("text", "")
+                    if obs.get("goal_object")
+                    else ""
+                ),
+                error_feedback="",
+                screenshot=(img_to_base_64(obs["screenshot"]) if "screenshot" in obs else ""),
+                screenshots=[],  # no overlay screenshots yet
+                axtree=obs.get("axtree_txt", ""),
+                hints=[],
+                suggestions=[],  # no suggestions yet
+            )
+            self.ui.update_context(initial_inputs)
+
+        # Generate first candidates
+        candidates = self.subagent.get_candidate_generations(obs, hint=None, n_candidates=3)
+        step_n_human_intervention_rounds += 1
+        suggestions = [{"action": c["action"], "think": c["agent_info"].think} for c in candidates]
+        # List of Images as base64 - create overlay screenshots for each suggested action
+        screenshots = [overlay_action(obs, choice["action"]) for choice in suggestions]
+
+        while True:
+            try:
+                hint_labeling_inputs = HintLabelingInputs(
+                    goal=(
+                        obs.get("goal_object", [{}])[0].get("text", "")
+                        if obs.get("goal_object")
+                        else ""
+                    ),
+                    error_feedback=obs.get("last_action_error", ""),
+                    screenshot=(img_to_base_64(obs["screenshot"]) if "screenshot" in obs else ""),
+                    screenshots=screenshots,  # list of overlay screenshots for hover
+                    axtree=obs.get("axtree_txt", ""),
+                    hints=step_hint,
+                    suggestions=suggestions,
+                )
+
+                self.ui.update_context(hint_labeling_inputs)
+                response = self.ui.wait_for_response(timeout=600)
+
+                if response["type"] == "reprompt":
+                    new_hints = response["payload"].get("hints", [])
+                    # Replace with the new list from UI, or extend if needed
+                    step_hint = list(new_hints) if isinstance(new_hints, list) else step_hint
+                    candidates = self.subagent.get_candidate_generations(
+                        obs, hint=step_hint if step_hint else None, n_candidates=3
+                    )
+                    step_n_human_intervention_rounds += 1
+                    suggestions = [
+                        {"action": c["action"], "think": c["agent_info"].think} for c in candidates
+                    ]
+                    screenshots = [overlay_action(obs, choice["action"]) for choice in suggestions]
+
+                elif response["type"] == "step":
+                    selected_action = response["payload"]["action"]
+                    choice_idx = None
+                    for i, candidate in enumerate(suggestions):
+                        if candidate["action"] == selected_action:
+                            choice_idx = i
+                            break
+                    selected_candidate = candidates[choice_idx]
+                    self.subagent.update_agent_state_from_selected_candidate(selected_candidate)
+                    action = selected_candidate["action"]
+                    agent_info = selected_candidate["agent_info"]
+                    return action, agent_info
+
+            except KeyboardInterrupt:
+                print("User cancelled the operation")
+                if self.ui:
+                    self.ui.close()
+                raise
+            except playwright.sync_api.TimeoutError:
+                # Handle timeout specifically: fall back to first candidate
+                print("UI timeout; falling back to first candidate.")
+                selected_candidate = candidates[0]
+                self.subagent.update_agent_state_from_selected_candidate(selected_candidate)
+                action = selected_candidate["action"]
+                agent_info = selected_candidate["agent_info"]
+                return action, agent_info
+            except Exception as e:
+                print(f"Error in human intervention UI: {e}")
+                if self.ui:
+                    self.ui.close()
+                    self.ui = None
+                # Raise exception instead of falling back to console input
+                raise RuntimeError(f"Human intervention UI failed: {e}") from e
+
+
+@dataclass
+class HumanInTheLoopAgentArgs(AgentArgs):
+    subagent_args: Optional[AgentArgs] = None  # args for the underlying multiple proposal agent
+
+    def make_agent(self):
+        assert self.subagent_args is not None
+        return HumanInTheLoopAgent(subagent_args=self.subagent_args)
+
+    def __post_init__(self):
+        """Prefix subagent name with 'HITL-'."""
+        super().__post_init__()
+        if self.subagent_args and self.subagent_args.agent_name:
+            self.agent_name = "HITL-" + self.subagent_args.agent_name
+
+    def set_benchmark(self, benchmark, demo_mode):
+        """Delegate set_benchmark to the subagent if it has the method."""
+        if hasattr(self.subagent_args, "set_benchmark"):
+            self.subagent_args.set_benchmark(benchmark, demo_mode)
+
+    def set_reproducibility_mode(self):
+        """Delegate set_reproducibility_mode to the subagent if it has the method."""
+        if hasattr(self.subagent_args, "set_reproducibility_mode"):
+            self.subagent_args.set_reproducibility_mode()
+
+
+def get_base_human_in_the_loop_genericagent(llm_config):
+    """
+    Create a base human-in-the-loop generic agent configuration using the key from CHAT_MODEL_ARGS_DICT.
+
+    This function creates a HumanInTheLoopAgentArgs instance with a MultiCandidateGenericAgent
+    as the subagent, configured with the specified LLM configuration and base flags.
+
+    Args:
+        llm_config (str): The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
+
+    Returns:
+        HumanInTheLoopAgentArgs: Configured human-in-the-loop agent arguments with
+                                a multi-candidate generic agent as the subagent.
+    """
+    from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
+    from agentlab.agents.hitl_agent.hitl_agent import HumanInTheLoopAgentArgs
+    from agentlab.agents.hitl_agent.multi_candidate_generic_agent import (
+        MultiCandidateGenericAgentArgs,
+    )
+    from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
+
+    return HumanInTheLoopAgentArgs(
+        subagent_args=MultiCandidateGenericAgentArgs(
+            chat_model_args=CHAT_MODEL_ARGS_DICT[llm_config],
+            flags=BASE_FLAGS,
+        )
+    )
+
+
+HUMAN_GUIDED_GENERIC_AGENT = get_base_human_in_the_loop_genericagent("openai/gpt-5-mini-2025-08-07")
+
+if __name__ == "__main__":
+    import logging
+
+    from agentlab.agents.hitl_agent.hitl_agent import (
+        HUMAN_GUIDED_GENERIC_AGENT,
+    )
+    from agentlab.experiments.study import Study
+
+    agent_configs = [HUMAN_GUIDED_GENERIC_AGENT]
+    benchmark = bgym.DEFAULT_BENCHMARKS["miniwob"]()
+    benchmark = benchmark.subset_from_glob("task_name", "*book*")
+    benchmark.env_args_list = benchmark.env_args_list[2:3]
+
+    for env_args in benchmark.env_args_list:
+        env_args.max_steps = 100  # max human steps
+        env_args.headless = False
+
+    Study(agent_configs, benchmark, logging_level=logging.WARNING).run(
+        n_jobs=1,
+        parallel_backend="sequential",
+        n_relaunch=1,
+    )
diff --git a/src/agentlab/agents/hitl_agent/launch_hint_ui.py b/src/agentlab/agents/hitl_agent/launch_hint_ui.py
new file mode 100644
index 00000000..df2e9dbc
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/launch_hint_ui.py
@@ -0,0 +1,176 @@
+"""
+Console launcher for the Human-in-the-Loop Generic Agent UI.
+
+Usage (installed entry point):
+    agentlab-mentor --benchmark miniwob --task-name miniwob.book-flight --seed 123 --no-headless
+
+This will run a Study with the MultipleProposalGenericAgent and the selected task.
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from pathlib import Path
+
+import bgym
+
+from agentlab.agents.hitl_agent.generic_human_guided_agent import get_base_agent
+from agentlab.experiments.exp_utils import RESULTS_DIR
+from agentlab.experiments.study import Study
+
+
+def build_benchmark(benchmark_name: str, task_name: str, seed: int, headless: bool):
+    # Instantiate benchmark by name using BrowserGym registry
+    try:
+        benchmark = bgym.DEFAULT_BENCHMARKS[benchmark_name.lower()]()
+    except KeyError as e:
+        choices = ", ".join(sorted(bgym.DEFAULT_BENCHMARKS.keys()))
+        raise SystemExit(f"Unknown benchmark '{benchmark_name}'. Choose one of: {choices}") from e
+
+    filtered_env_args = [
+        env_args for env_args in benchmark.env_args_list if env_args.task_name == task_name
+    ]
+    if not filtered_env_args:
+        raise SystemExit(f'No tasks found matching "{task_name}"')
+    filtered_env_args = filtered_env_args[:1]  # take the first one
+    benchmark.env_args_list = filtered_env_args
+
+    # Reasonable defaults for interactive UI
+    for env_args in benchmark.env_args_list:
+        env_args.task_seed = seed
+        env_args.max_steps = env_args.max_steps or 200
+        env_args.headless = headless
+
+    return benchmark
+
+
+def extract_hints_from_experiment_trace(exp_dir):
+    """Extracts hints from every step of each episode in a exp_dir and returns a df with each row containing a hint.
+
+    Args:
+        exp_dir: Path-like to a study/experiment directory whose results should be scanned.
+
+    Returns:
+        pandas.DataFrame: One row per hint with metadata columns.
+    """
+    import pandas as pd
+
+    from agentlab.analyze import inspect_results
+    from agentlab.experiments.exp_utils import RESULTS_DIR
+    from agentlab.experiments.loop import ExpResult
+
+    output = []
+    # Use provided exp_dir if set; otherwise default to <$AGENTLAB_EXP_ROOT>/agentlab_mentor
+    result_df = inspect_results.load_result_df(exp_dir or (RESULTS_DIR / "agentlab_mentor"))
+    if result_df is None:
+        # No results to parse; return empty dataframe with expected columns
+        return pd.DataFrame(
+            columns=[
+                "exp_id",
+                "agent_name",
+                "benchmark",
+                "task_name",
+                "episode_reward",
+                "hint",
+            ]
+        )
+    result_df = result_df.reset_index()
+    for _, row in result_df.iterrows():
+        result = ExpResult(row.exp_dir)
+        episode = result.steps_info
+        episode_reward = max([step.reward for step in episode])
+        for step_info in episode:
+            step_hints = step_info.agent_info.get("extra_info", {}).get("step_hints", None)
+            if step_hints:
+                for hint in step_hints:
+                    output.append(
+                        {
+                            "exp_id": row["exp_id"],
+                            "agent_name": row["agent.agent_name"],
+                            "benchmark": row["env.task_name"].split(".")[0],
+                            "task_name": row["env.task_name"],
+                            "episode_reward": episode_reward,
+                            "hint": hint,
+                        }
+                    )
+    output = pd.DataFrame(output)
+    output = output.dropna()
+    return output
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Run HITL Generic Agent UI on a benchmark task")
+    p.add_argument(
+        "--benchmark",
+        required=False,
+        help="Benchmark name as registered in BrowserGym, e.g., miniwob, workarena_l1, webarena, visualwebarena",
+    )
+    p.add_argument(
+        "--task-name",
+        dest="task_name",
+        required=False,
+        help="Exact task name within the benchmark (e.g., 'miniwob.book-flight')",
+    )
+    p.add_argument(
+        "--seed",
+        type=int,
+        required=False,
+        help="Task seed to use for the selected task.",
+    )
+    p.add_argument(
+        "--llm-config",
+        dest="llm_config",
+        default="openai/gpt-5-mini-2025-08-07",
+        help="LLM configuration to use for the agent (e.g., 'azure/gpt-5-mini-2025-08-07').",
+    )
+    p.add_argument(
+        "--headless",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Run the browser headless (default: True). Use --no-headless to show the browser.",
+    )
+    p.add_argument(
+        "--download-hints",
+        nargs="?",
+        const="extracted_hints.csv",
+        required=False,
+        default=None,
+        metavar="[OUTPUT_CSV]",
+        help=(
+            "Extract hints from the default study directory and save to OUTPUT_CSV. "
+            "If OUTPUT_CSV is omitted, saves to 'extracted_hints.csv'. When provided, other args are ignored."
+        ),
+    )
+    return p.parse_args()
+
+
+def main():
+    args = parse_args()
+    save_dir = RESULTS_DIR / "agentlab_mentor"
+    if args.download_hints:
+        df = extract_hints_from_experiment_trace(save_dir)
+        out_path = Path(args.download_hints)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        df.to_csv(out_path, index=False)
+        print(str(out_path))
+        return
+    # Validate required args only when not downloading hints
+    if not args.benchmark or not args.task_name or args.seed is None:
+        raise SystemExit(
+            "--benchmark, --task-name, and --seed are required unless using --download-hints"
+        )
+    benchmark = build_benchmark(args.benchmark, args.task_name, args.seed, args.headless)
+    agent_configs = [get_base_agent(args.llm_config)]
+    # study is needed to run the 'set_benchmark' method which sets appropriate agent parameters.
+    study = Study(agent_args=agent_configs, benchmark=benchmark, logging_level=logging.WARNING)
+    study.run(
+        n_jobs=1,
+        parallel_backend="sequential",
+        n_relaunch=1,
+        exp_root=save_dir,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/agentlab/agents/hitl_agent/multi_candidate_generic_agent.py b/src/agentlab/agents/hitl_agent/multi_candidate_generic_agent.py
new file mode 100644
index 00000000..e4e53b7a
--- /dev/null
+++ b/src/agentlab/agents/hitl_agent/multi_candidate_generic_agent.py
@@ -0,0 +1,225 @@
+import re
+from dataclasses import asdict, dataclass
+from typing import Dict, List
+
+from browsergym.experiments.agent import AgentInfo
+
+from agentlab.agents import dynamic_prompting as dp
+from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericAgentArgs
+from agentlab.agents.generic_agent.generic_agent_prompt import MainPrompt
+from agentlab.llm.llm_utils import Discussion, HumanMessage, SystemMessage
+
+
+class CandidatesGeneration(dp.PromptElement):
+    # Ask for multiple alternatives; each candidate must contain <think> and <action>.
+    def __init__(self, hint: list[str] | None = None, n_candidates=3) -> None:
+        self.hint = hint
+        self.n_candidates = n_candidates
+        self.hint_prompt = "\n".join(f"{i}. {c}" for i, c in enumerate(hint, 1)) if hint else ""
+        super().__init__(True)
+        self._prompt = [
+            dict(
+                type="text",
+                text=f"""
+    You are a web agent. Propose {self.n_candidates} alternative next steps for the current page.
+    {('Use the Hints:' + self.hint_prompt) if self.hint else ""}\n
+    Return EACH candidate wrapped as numbered tags:
+    <candidate_generation_1>...</candidate_generation_1>
+    <candidate_generation_2>...</candidate_generation_2>
+
+    Inside every candidate you MUST include:
+    <think>...why this action is appropriate now...</think>
+    <action>...ONE atomic, executable action string...</action>
+
+    Do not include any extra text outside the candidate tags.
+    Use this format:
+    <candidate_generation_1>
+    <think>Explain why Candidate One is chosen</think>
+    <action>Candidate One Action</action>
+    </candidate_generation_1>
+
+    <candidate_generation_2>
+    <think>Explain why Candidate Two is chosen</think>
+    <action>Candidate Two Action</action>
+    </candidate_generation_2>
+    # Example 
+    <candidate_generation_1>
+    <think>The login button is visible and proceeding will reveal the auth form.</think>
+    <action>click(role="button", name="Log in")</action>
+    </candidate_generation_1>
+
+    <candidate_generation_2>
+    <think>User might need to enter email first; the email field is focused and visible.</think>
+    <action>fill(bid="a112", text="user@example.com")</action>
+    </candidate_generation_2>
+    """,
+            )
+        ]
+
+    # Regex patterns for numbered candidates only
+    _NUM_BLOCK = re.compile(
+        r"<\s*candidate[_ ]generation[_ ](?P<idx>[0-9]+)\s*>(?P<body>.*?)<\s*/\s*candidate[_ ]generation[_ ](?P=idx)\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    _THINK_PATTERN = re.compile(
+        r"<\s*think\s*>(?P<think>.*?)<\s*/\s*think\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    _ACTION_PATTERN = re.compile(
+        r"<\s*action\s*>(?P<action>.*?)<\s*/\s*action\s*>",
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+
+    def _parse_answer(self, text_answer: str) -> Dict[str, Dict[str, str]]:
+        """Extract up to n_candidates candidates, using numbered tags only.
+
+        Args:
+            text_answer: The text response containing candidate generation tags.
+
+        Returns:
+            Dictionary mapping candidate names to their think and action content.
+            Format: {"candidate_generation_1": {"think": "...", "action": "..."}, ...}
+        """
+        result = {
+            f"candidate_generation_{i+1}": {"think": "", "action": ""}
+            for i in range(self.n_candidates)
+        }
+
+        if not isinstance(text_answer, str):
+            return result
+
+        matches: List[re.Match] = list(self._NUM_BLOCK.finditer(text_answer))
+        # Sort by numeric index
+        matches_sorted = sorted(matches, key=lambda m: int(m.group("idx")))
+        for i, m in enumerate(matches_sorted[: self.n_candidates]):
+            body = m.group("body").strip()
+            think_m = self._THINK_PATTERN.search(body)
+            action_m = self._ACTION_PATTERN.search(body)
+            result[f"candidate_generation_{i+1}"] = {
+                "think": (think_m.group("think").strip() if think_m else ""),
+                "action": (action_m.group("action").strip() if action_m else ""),
+            }
+
+        return result
+
+
+class MultiCandidateGenericAgent(GenericAgent):
+
+    def __init__(
+        self,
+        chat_model_args,
+        flags,
+        max_retry: int = 4,
+    ):
+        super().__init__(chat_model_args, flags, max_retry)
+
+    def get_candidate_generations(
+        self,
+        obs,
+        hint: list[str] | None = None,
+        n_candidates=3,
+    ) -> list[dict]:
+        # Append obs to history only if it's not already the last entry
+        # Important to handle cases when get_candidate_generation is called multiple times in a single step.
+        if not self.obs_history or self.obs_history[-1] is not obs:
+            self.obs_history.append(obs)
+
+        main_prompt = MainPrompt(
+            action_set=self.action_set,
+            obs_history=self.obs_history,
+            actions=self.actions,
+            memories=self.memories,
+            thoughts=self.thoughts,
+            previous_plan=self.plan,
+            step=self.plan_step,
+            flags=self.flags,
+        )
+        max_prompt_tokens, max_trunc_itr = self._get_maxes()
+
+        system_prompt = SystemMessage(dp.SystemPrompt().prompt)
+
+        human_prompt = dp.fit_tokens(
+            shrinkable=main_prompt,
+            max_prompt_tokens=max_prompt_tokens,
+            model_name=self.chat_model_args.model_name,
+            max_iterations=max_trunc_itr,
+            additional_prompts=system_prompt,
+        )
+
+        cg = CandidatesGeneration(hint=hint, n_candidates=n_candidates)
+        candidates_prompt = HumanMessage(cg.prompt)
+        chat_messages = Discussion([system_prompt, human_prompt, candidates_prompt])
+        output = self.chat_llm(chat_messages)
+        candidates = cg._parse_answer(output["content"])
+        # Not adding the generate candidate prompt to xray.
+        msg_to_add_to_xray = Discussion([system_prompt, human_prompt])
+        suggestions = [
+            {
+                "action": candidate["action"],
+                "think": candidate["think"],
+            }
+            for key, candidate in candidates.items()
+        ]
+        output = []
+        for candidate in suggestions:
+            agent_info = AgentInfo(
+                think=candidate.get("think", None),
+                chat_messages=msg_to_add_to_xray,
+                stats=self.chat_llm.get_stats(),
+                extra_info={
+                    "chat_model_args": asdict(self.chat_model_args),
+                    "think": candidate.get("think", None),
+                    "plan": candidate.get("plan", None),
+                    "step": candidate.get("step", None),
+                    "memory": candidate.get("memory", None),
+                },
+            )
+            output.append({"action": candidate["action"], "agent_info": agent_info})
+
+        return output
+
+    def update_agent_state_from_selected_candidate(self, output):
+        """Updates the agent's internal state based on the selected candidate from human feedback.
+
+        Args:
+            output: Dictionary containing 'action' and 'agent_info' keys from selected candidate.
+        """
+        action, agent_info = output["action"], output["agent_info"]
+        self.plan = agent_info.extra_info.get("plan", self.plan)
+        self.plan_step = agent_info.extra_info.get("step", self.plan_step)
+        self.memories.append(agent_info.extra_info.get("memory", None))
+        self.thoughts.append(agent_info.extra_info.get("think", None))
+        self.actions.append(action)
+
+    def get_action(self, obs):
+        """Generates multiple candidates and always returns the first one.
+        This allows to use this agent as a drop-in replacement for a single-candidate agent.
+
+        Args:
+            obs: The observation from the environment.
+
+        Returns:
+            tuple: A tuple containing (action, agent_info).
+        """
+        candidates = self.get_candidate_generations(obs, hint=None, n_candidates=2)
+        selection = candidates[0]  # always select the first option.
+        self.update_agent_state_from_selected_candidate(selection)
+        action, agent_info = selection["action"], selection["agent_info"]
+
+        return action, agent_info
+
+
+@dataclass
+class MultiCandidateGenericAgentArgs(GenericAgentArgs):
+    def make_agent(self):
+        return MultiCandidateGenericAgent(
+            chat_model_args=self.chat_model_args,
+            flags=self.flags,
+            max_retry=self.max_retry,
+        )
+
+    def __post_init__(self):
+        """Prefix subagent name with 'MC-'."""
+        super().__post_init__()
+        if hasattr(self, "agent_name") and self.agent_name:
+            self.agent_name = "MC-" + self.agent_name
diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py
index 8accbfd6..6dbec117 100644
--- a/src/agentlab/analyze/agent_xray.py
+++ b/src/agentlab/analyze/agent_xray.py
@@ -818,6 +818,18 @@ def update_agent_info_html():
         s1, action_str = get_screenshot(info, info.step, False)
         s2, action_str = get_screenshot(info, info.step + 1, False)
         agent_info = info.exp_result.steps_info[info.step].agent_info
+        # Minimal: show step_hints if present
+        hints = (
+            agent_info.get("step_hints")
+            or agent_info.get("hints")
+            or agent_info.get("extra_info", {}).get("step_hints")
+        )
+        if hints:
+            if not isinstance(hints, (list, tuple)):
+                hints = [hints]
+            items = "".join(f"<li>{html.escape(str(h))}</li>" for h in hints)
+            hints_html = f"<html><body><h3>Step Hints</h3><ul>{items}</ul></body></html>"
+            return _page_to_iframe(hints_html), s1, s2
         page = agent_info.get("html_page", ["No Agent Info"])
         if page is None:
             page = """Fill up html_page attribute in AgentInfo to display here."""
diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py
index 10013b72..2bc83d43 100644
--- a/src/agentlab/llm/llm_utils.py
+++ b/src/agentlab/llm/llm_utils.py
@@ -727,6 +727,16 @@ def image_to_png_base64_url(image: np.ndarray | Image.Image):
     return f"data:image/png;base64,{image_base64}"
 
 
+def img_to_base_64(image: Image.Image | np.ndarray) -> str:
+    """Converts a PIL Image or NumPy array to a base64-encoded string."""
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    b64_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return b64_str
+
+
 class BaseMessage(dict):
     def __init__(self, role: str, content: Union[str, list[dict]], **kwargs):
         allowed_attrs = {"log_probs"}