From c9d2ef9648435ef1119950ecb1a0734497ccc33b Mon Sep 17 00:00:00 2001 From: Leo Boisvert Date: Tue, 25 Feb 2025 01:58:48 +0000 Subject: [PATCH 1/3] Add AGENT_37_SONNET support and update agent configurations --- main.py | 10 +++++++--- src/agentlab/agents/generic_agent/__init__.py | 2 ++ src/agentlab/agents/generic_agent/agent_configs.py | 5 ++++- src/agentlab/llm/llm_configs.py | 8 ++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 43e876af..b33cfd4b 100644 --- a/main.py +++ b/main.py @@ -14,13 +14,17 @@ RANDOM_SEARCH_AGENT, AGENT_4o, AGENT_4o_MINI, + AGENT_o3_MINI, + AGENT_o1_MINI, + AGENT_37_SONNET, + AGENT_CLAUDE_SONNET_35, ) from agentlab.experiments.study import Study logging.getLogger().setLevel(logging.INFO) # choose your agent or provide a new agent -agent_args = [AGENT_4o_MINI] +agent_args = [AGENT_o1_MINI] # agent_args = [AGENT_4o] @@ -35,14 +39,14 @@ # Set reproducibility_mode = True for reproducibility # this will "ask" agents to be deterministic. Also, it will prevent you from launching if you have # local changes. For your custom agents you need to implement set_reproducibility_mode -reproducibility_mode = False +reproducibility_mode = True # Set relaunch = True to relaunch an existing study, this will continue incomplete # experiments and relaunch errored experiments relaunch = False ## Number of parallel jobs -n_jobs = 4 # Make sure to use 1 job when debugging in VSCode +n_jobs = 5 # Make sure to use 1 job when debugging in VSCode # n_jobs = -1 # to use all available cores diff --git a/src/agentlab/agents/generic_agent/__init__.py b/src/agentlab/agents/generic_agent/__init__.py index b4712cfe..aee8c3aa 100644 --- a/src/agentlab/agents/generic_agent/__init__.py +++ b/src/agentlab/agents/generic_agent/__init__.py @@ -16,6 +16,7 @@ AGENT_4o, AGENT_4o_MINI, AGENT_CLAUDE_SONNET_35, + AGENT_37_SONNET, AGENT_4o_VISION, AGENT_o3_MINI, AGENT_o1_MINI, @@ -34,6 +35,7 @@ "RANDOM_SEARCH_AGENT", "AGENT_CUSTOM", "AGENT_CLAUDE_SONNET_35", + "AGENT_37_SONNET", "AGENT_4o_VISION", "AGENT_4o_MINI_VISION", "AGENT_CLAUDE_SONNET_35_VISION", diff --git a/src/agentlab/agents/generic_agent/agent_configs.py b/src/agentlab/agents/generic_agent/agent_configs.py index e21ada58..789f350a 100644 --- a/src/agentlab/agents/generic_agent/agent_configs.py +++ b/src/agentlab/agents/generic_agent/agent_configs.py @@ -264,7 +264,10 @@ chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/anthropic/claude-3.5-sonnet:beta"], flags=FLAGS_GPT_4o, ) - +AGENT_37_SONNET = GenericAgentArgs( + chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/anthropic/claude-3.7-sonnet"], + flags=FLAGS_GPT_4o, +) AGENT_o3_MINI = GenericAgentArgs( chat_model_args=CHAT_MODEL_ARGS_DICT["openai/o3-mini-2025-01-31"], flags=FLAGS_GPT_4o, diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 2958f92b..ba0e15cc 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -184,6 +184,14 @@ max_new_tokens=2_000, temperature=1e-1, ), + "openrouter/anthropic/claude-3.7-sonnet": OpenRouterModelArgs( + model_name="anthropic/claude-3.7-sonnet", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=8_192, + temperature=1e-1, + vision_support=True, + ), "openrouter/openai/o1-mini-2024-09-12": OpenRouterModelArgs( model_name="openai/o1-mini-2024-09-12", max_total_tokens=128_000, From 2d8d4de1113d19a1b195bf4795a5cb858457656c Mon Sep 17 00:00:00 2001 From: Leo Boisvert Date: Tue, 25 Feb 2025 13:44:50 +0000 Subject: [PATCH 2/3] add new results to repro journal --- reproducibility_journal.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index 991cedae..b42d39ee 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -73,3 +73,4 @@ Leo Boisvert,GenericAgent-openai_o1-mini-2024-09-12,workarena_l1,0.4.1,2025-02-0 M: src/agentlab/agents/generic_agent/agent_configs.py M: src/agentlab/analyze/agent_xray.py M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29," +Leo Boisvert,GenericAgent-anthropic_claude-3.7-sonnet,workarena_l1,0.4.1,2025-02-25_02-32-09,d4f900c2-1de1-4e4b-a3ab-495ff2675fff,0.515,0.028,0,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.4.0,c9d2ef9648435ef1119950ecb1a0734497ccc33b,,0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29, From 24f48f38c3df0e302989f47776dfdc4a16274d7f Mon Sep 17 00:00:00 2001 From: Leo Boisvert Date: Thu, 13 Mar 2025 10:46:21 -0400 Subject: [PATCH 3/3] Revert changes to main script --- main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index b33cfd4b..a0a35e4f 100644 --- a/main.py +++ b/main.py @@ -24,7 +24,7 @@ logging.getLogger().setLevel(logging.INFO) # choose your agent or provide a new agent -agent_args = [AGENT_o1_MINI] +agent_args = [AGENT_4o_MINI] # agent_args = [AGENT_4o] @@ -39,14 +39,14 @@ # Set reproducibility_mode = True for reproducibility # this will "ask" agents to be deterministic. Also, it will prevent you from launching if you have # local changes. For your custom agents you need to implement set_reproducibility_mode -reproducibility_mode = True +reproducibility_mode = False # Set relaunch = True to relaunch an existing study, this will continue incomplete # experiments and relaunch errored experiments relaunch = False ## Number of parallel jobs -n_jobs = 5 # Make sure to use 1 job when debugging in VSCode +n_jobs = 4 # Make sure to use 1 job when debugging in VSCode # n_jobs = -1 # to use all available cores