From ed81011d69929b47547561341e117c28a70d17bc Mon Sep 17 00:00:00 2001 From: Oleh Shliazhko Date: Fri, 25 Jul 2025 14:55:32 +0200 Subject: [PATCH 1/3] update hints --- src/agentlab/agents/tool_use_agent/hint_db.csv | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/agentlab/agents/tool_use_agent/hint_db.csv b/src/agentlab/agents/tool_use_agent/hint_db.csv index 8333de2b..055f10c0 100644 --- a/src/agentlab/agents/tool_use_agent/hint_db.csv +++ b/src/agentlab/agents/tool_use_agent/hint_db.csv @@ -21,6 +21,18 @@ July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,W July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,"Before clicking submit, make sure that all fields are filled properly. Then click submit." July 13,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,Avoid back and forth from tabs to tabs to reduce the number of actions July 14,workarena.servicenow.create-hardware-asset,385,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,allac,Filling form in WorkArena,When you see auto-complete make sure to select an element from that list +July 24,workarena.servicenow.filter-incident-list,343,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When start filtering use 'Show filter button' to show the filter row" July 24,workarena.servicenow.filter-incident-list,647,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When observing combobox selector in the row of a filter, click the table cell that contains the combobox and not the combobox itself" -July 24,workarena.servicenow.filter-incident-list,64,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When choosing the operator of the filter, use select_option for combobox instead of click" July 24,workarena.servicenow.filter-incident-list,856,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"After filling the value of the filter field combobox choose the correct option from the dropdown list with click" +July 24,workarena.servicenow.filter-incident-list,64,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When choosing the operator of the filter, use select_option for combobox instead of click" +July 24,workarena.servicenow.filter-incident-list,812,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"If the field for a filter value is a combobox, use select_option for choosing the correct value instead of click" +July 24,workarena.servicenow.filter-service-catalog-item-list,343,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When start filtering use 'Show filter button' to show the filter row" +July 24,workarena.servicenow.filter-service-catalog-item-list,647,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When observing combobox selector in the row of a filter, click the table cell that contains the combobox and not the combobox itself" +July 24,workarena.servicenow.filter-service-catalog-item-list,856,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"After filling the value of the filter field combobox choose the correct option from the dropdown list with click" +July 24,workarena.servicenow.filter-service-catalog-item-list,64,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When choosing the operator of the filter, use select_option for combobox instead of click" +July 24,workarena.servicenow.filter-service-catalog-item-list,812,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"If the field for a filter value is a combobox, use select_option for choosing the correct value instead of click" +July 24,workarena.servicenow.filter-user-list,343,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When start filtering use 'Show filter button' to show the filter row" +July 24,workarena.servicenow.filter-user-list,647,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When observing combobox selector in the row of a filter, click the table cell that contains the combobox and not the combobox itself" +July 24,workarena.servicenow.filter-user-list,856,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"After filling the value of the filter field combobox choose the correct option from the dropdown list with click" +July 24,workarena.servicenow.filter-user-list,64,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"When choosing the operator of the filter, use select_option for combobox instead of click" +July 24,workarena.servicenow.filter-user-list,812,gpt-4.1,ToolUse-gpt-4.1,WorkArena-L1,WorkArena-L1,oleh,Filtering the list,"If the field for a filter value is a combobox, use select_option for choosing the correct value instead of click" From 0bc669e4b3b3d83f31bd3b468faaa4c94319d023 Mon Sep 17 00:00:00 2001 From: Oleh Shliazhko Date: Fri, 25 Jul 2025 18:01:45 +0200 Subject: [PATCH 2/3] llm retrieval of hints based on the relevance of semantic key of the hint to the given goal --- .../agents/tool_use_agent/tool_use_agent.py | 56 +++++++++++++++++-- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py index 7b2c4e61..f60db89d 100644 --- a/src/agentlab/agents/tool_use_agent/tool_use_agent.py +++ b/src/agentlab/agents/tool_use_agent/tool_use_agent.py @@ -1,10 +1,13 @@ import fnmatch import json +import logging from abc import ABC, abstractmethod +from collections import defaultdict from copy import copy from dataclasses import asdict, dataclass, field +from email.policy import default from pathlib import Path -from typing import Any +from typing import Any, Literal import bgym import pandas as pd @@ -34,6 +37,8 @@ ) from agentlab.llm.tracking import cost_tracker_decorator +logger = logging.getLogger(__name__) + @dataclass class Block(ABC): @@ -296,6 +301,10 @@ def apply_init(self, llm, discussion: StructuredDiscussion) -> dict: class TaskHint(Block): use_task_hint: bool = True hint_db_rel_path: str = "hint_db.csv" + hint_retrieval_mode: Literal["direct", "llm"] = "direct" # direct or retrieval + llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n +You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n +Choose hint for the task and return only its numbers, e.g. 1. If you don't know the answer, return -1.""" def _init(self): """Initialize the block.""" @@ -306,9 +315,8 @@ def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict: if not self.use_task_hint: return - task_hints = self.hint_db[ - self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name)) - ] + goal = "\n".join([c.get("text", "") for c in discussion.groups[0].messages[1].content]) + task_hints = self.choose_hints(llm, task_name, goal) hints = [] for hint in task_hints["hint"]: @@ -325,6 +333,44 @@ def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict: discussion.append(msg) + def choose_hints(self, llm, task_name: str, goal: str) -> pd.DataFrame: + """Choose hints based on the task name.""" + if self.hint_retrieval_mode == "llm": + return self.choose_hints_llm(llm, goal) + elif self.hint_retrieval_mode == "direct": + return self.choose_hints_direct(task_name) + else: + raise ValueError(f"Unknown hint retrieval mode: {self.hint_retrieval_mode}") + + def choose_hints_llm(self, llm, goal: str) -> pd.DataFrame: + """Choose hints using LLM to filter the hints.""" + topic_to_hints = defaultdict(list) + for i, row in self.hint_db.iterrows(): + topic_to_hints[row["semantic_keys"]].append(i) + hint_topics = list(topic_to_hints.keys()) + topics = "\n".join([f"{i}. {h}" for i, h in enumerate(hint_topics)]) + prompt = self.llm_prompt.format(goal=goal, topics=topics) + response = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)])) + try: + hint_topic_idx = json.loads(response.think) + if hint_topic_idx < 0 or hint_topic_idx >= len(hint_topics): + logger.error(f"Wrong LLM hint id response: {response.think}, return no hints") + return pd.DataFrame(columns=self.hint_db.columns) + hint_topic = hint_topics[hint_topic_idx] + hint_indices = topic_to_hints[hint_topic] + df = self.hint_db.iloc[hint_indices].copy() + df = df.drop_duplicates(subset=["hint"], keep="first") # leave only unique hints + logger.debug(f"LLM hint topic {hint_topic_idx}, chosen hints: {df['hint'].tolist()}") + except json.JSONDecodeError: + logger.error(f"Failed to parse LLM hint id response: {response.think}, return no hints") + df = pd.DataFrame(columns=self.hint_db.columns) + return df + + def choose_hints_direct(self, task_name: str) -> pd.DataFrame: + return self.hint_db[ + self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name)) + ] + @dataclass class PromptConfig: @@ -583,7 +629,7 @@ def get_action(self, obs: Any) -> float: ), summarizer=Summarizer(do_summary=True), general_hints=GeneralHints(use_hints=False), - task_hint=TaskHint(use_task_hint=True), + task_hint=TaskHint(use_task_hint=True, hint_retrieval_mode="llm"), keep_last_n_obs=None, multiaction=True, # whether to use multi-action or not # action_subsets=("bid",), From 56dc3923ee7dde77238da8913f31f9f802d78ad6 Mon Sep 17 00:00:00 2001 From: Oleh Shliazhko Date: Mon, 28 Jul 2025 11:37:56 +0200 Subject: [PATCH 3/3] adjust prompt --- src/agentlab/agents/tool_use_agent/tool_use_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py index f60db89d..31c8483b 100644 --- a/src/agentlab/agents/tool_use_agent/tool_use_agent.py +++ b/src/agentlab/agents/tool_use_agent/tool_use_agent.py @@ -304,7 +304,7 @@ class TaskHint(Block): hint_retrieval_mode: Literal["direct", "llm"] = "direct" # direct or retrieval llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n -Choose hint for the task and return only its numbers, e.g. 1. If you don't know the answer, return -1.""" +Choose hint topic for the task and return only its number, e.g. 1. If you don't know the answer, return -1.""" def _init(self): """Initialize the block."""