diff --git a/src/agentlab/analyze/inspect_results.py b/src/agentlab/analyze/inspect_results.py index 23f41f9b..41ee5c93 100644 --- a/src/agentlab/analyze/inspect_results.py +++ b/src/agentlab/analyze/inspect_results.py @@ -14,8 +14,6 @@ from IPython.display import display from tqdm import tqdm -from agentlab.experiments.exp_utils import RESULTS_DIR - # TODO find a more portable way to code set_task_category_as_index at least # handle dynamic imports. We don't want to always import workarena # from browsergym.workarena import TASK_CATEGORY_MAP diff --git a/src/agentlab/experiments/exp_utils.py b/src/agentlab/experiments/exp_utils.py index 6a36d37a..5759e6d1 100644 --- a/src/agentlab/experiments/exp_utils.py +++ b/src/agentlab/experiments/exp_utils.py @@ -6,7 +6,7 @@ from pathlib import Path from time import sleep, time -from browsergym.experiments.loop import ExpArgs, _move_old_exp, yield_all_exp_results +from browsergym.experiments.loop import ExpArgs, yield_all_exp_results from tqdm import tqdm logger = logging.getLogger(__name__) # Get logger based on module name diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index d7dcc5cf..2bbf219d 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -126,9 +126,13 @@ def retry_multiple( """ tries = 0 while tries < n_retry: - answer_list = chat(messages, num_samples=num_samples) + answer_list = chat(messages, n_samples=num_samples) # TODO: could we change this to not use inplace modifications ? - messages.append(answer) + if not isinstance(answer_list, list): + answer_list = [answer_list] + + # TODO taking the 1st hides the other generated answers in AgentXRay + messages.append(answer_list[0]) parsed_answers = [] errors = [] for answer in answer_list: