From 64ddeb94b84508ee827731676eb79d189be8cb94 Mon Sep 17 00:00:00 2001 From: recursix Date: Wed, 4 Dec 2024 16:04:05 -0500 Subject: [PATCH 1/3] bug from a browsergym update --- src/agentlab/analyze/inspect_results.py | 2 -- src/agentlab/experiments/exp_utils.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/agentlab/analyze/inspect_results.py b/src/agentlab/analyze/inspect_results.py index 23f41f9b..41ee5c93 100644 --- a/src/agentlab/analyze/inspect_results.py +++ b/src/agentlab/analyze/inspect_results.py @@ -14,8 +14,6 @@ from IPython.display import display from tqdm import tqdm -from agentlab.experiments.exp_utils import RESULTS_DIR - # TODO find a more portable way to code set_task_category_as_index at least # handle dynamic imports. We don't want to always import workarena # from browsergym.workarena import TASK_CATEGORY_MAP diff --git a/src/agentlab/experiments/exp_utils.py b/src/agentlab/experiments/exp_utils.py index 6a36d37a..5759e6d1 100644 --- a/src/agentlab/experiments/exp_utils.py +++ b/src/agentlab/experiments/exp_utils.py @@ -6,7 +6,7 @@ from pathlib import Path from time import sleep, time -from browsergym.experiments.loop import ExpArgs, _move_old_exp, yield_all_exp_results +from browsergym.experiments.loop import ExpArgs, yield_all_exp_results from tqdm import tqdm logger = logging.getLogger(__name__) # Get logger based on module name From a5576998cf300bb99becfce99f66ce633c7560f0 Mon Sep 17 00:00:00 2001 From: recursix Date: Wed, 4 Dec 2024 16:05:06 -0500 Subject: [PATCH 2/3] Fix retry_multiple function to handle single and multiple answers correctly --- src/agentlab/llm/llm_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index d7dcc5cf..824f9135 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -126,9 +126,13 @@ def retry_multiple( """ tries = 0 while tries < n_retry: - answer_list = chat(messages, num_samples=num_samples) + answer_list = chat(messages, n_samples=num_samples) # TODO: could we change this to not use inplace modifications ? - messages.append(answer) + if not isinstance(answer_list, list): + answer_list = [answer_list] + + # TODO taking the 1st hides the other generated answers in AgentXRay + messages.append(answer_list[0]) parsed_answers = [] errors = [] for answer in answer_list: From 54356e5754e417d6884ec39857eaf5548a695771 Mon Sep 17 00:00:00 2001 From: recursix Date: Wed, 4 Dec 2024 16:10:39 -0500 Subject: [PATCH 3/3] black --- src/agentlab/llm/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index 824f9135..2bbf219d 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -132,7 +132,7 @@ def retry_multiple( answer_list = [answer_list] # TODO taking the 1st hides the other generated answers in AgentXRay - messages.append(answer_list[0]) + messages.append(answer_list[0]) parsed_answers = [] errors = [] for answer in answer_list: