ServiceNow · amanjaiswal73892 · May 8, 2025 · May 9, 2025 · May 10, 2025 · May 13, 2025
diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv
diff --git a/src/agentlab/agents/__init__.py b/src/agentlab/agents/__init__.py
@@ -9,5 +9,7 @@
 
 - TapeAgent: An agent that uses the Tape data structure to perform actions
 
+- VisualAgent: An agent that uses visual observations to to perform actions
+
 - VisualWebArenaAgent: An implementation of the agent used in WebArena and VisualWebArena
 """
diff --git a/src/agentlab/agents/generic_agent/agent_configs.py b/src/agentlab/agents/generic_agent/agent_configs.py
@@ -10,7 +10,7 @@
 
 from .generic_agent import GenericAgentArgs
 from .generic_agent_prompt import GenericPromptFlags
-from .tmlr_config import BASE_FLAGS
+from .tmlr_config import BASE_FLAGS, get_base_agent
 
 FLAGS_CUSTOM = GenericPromptFlags(
     obs=dp.ObsFlags(
@@ -302,6 +302,9 @@
     flags=BASE_FLAGS,
 )
 
+AGENT_QWEN_2_5_VL_32B = get_base_agent("openrouter/qwen/qwen2.5-vl-32b-instruct")
+AGENT_QWEN_3_32B = get_base_agent("openrouter/qwen/qwen3-32b")
+
 DEFAULT_RS_FLAGS = GenericPromptFlags(
     flag_group="default_rs",
     obs=dp.ObsFlags(

diff --git a/src/agentlab/agents/generic_agent/tmlr_config.py b/src/agentlab/agents/generic_agent/tmlr_config.py
@@ -51,6 +51,7 @@
 
 
 def get_base_agent(llm_config: str):
+    assert llm_config in CHAT_MODEL_ARGS_DICT, f"Unsupported LLM config: {llm_config}"
     return GenericAgentArgs(
         chat_model_args=CHAT_MODEL_ARGS_DICT[llm_config],
         flags=BASE_FLAGS,

diff --git a/src/agentlab/agents/visual_agent/agent_configs.py b/src/agentlab/agents/visual_agent/agent_configs.py
@@ -42,3 +42,46 @@
     chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/anthropic/claude-3.5-sonnet:beta"],
     flags=DEFAULT_PROMPT_FLAGS,
 )
+
+VISUAL_AGENT_QWEN_2_5_VL_32B = VisualAgentArgs(
+    chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/qwen/qwen2.5-vl-32b-instruct"],
+    flags=DEFAULT_PROMPT_FLAGS,
+)
+
+def get_som_agent(llm_config: str):
+    """Creates basic 1-step vision SOM agent"""
+    assert llm_config in CHAT_MODEL_ARGS_DICT, f"Unsupported LLM config: {llm_config}"
+    obs_flags = dp.ObsFlags(
+        use_tabs=True, 
+        use_error_logs=True,
+        use_past_error_logs=False,
+        use_screenshot=True,
+        use_som=True,
+        openai_vision_detail="auto",
+    )
+    action_flags = dp.ActionFlags(
+        action_set=bgym.HighLevelActionSetArgs(subsets=["bid"]),
+        long_description=True,
+        individual_examples=False,
+    )
+    som_prompt_flags = PromptFlags(
+        obs=obs_flags,
+        action=action_flags,
+        use_thinking=True,
+        use_concrete_example=False,
+        use_abstract_example=True,
+        enable_chat=False,
+        extra_instructions=None,
+    )
+
+    agent_args = VisualAgentArgs(
+        chat_model_args=CHAT_MODEL_ARGS_DICT[llm_config],
+        flags=som_prompt_flags,
+    )
+    model_name = agent_args.chat_model_args.model_name
+    agent_args.agent_name = f"VisualAgent-som-{model_name}".replace("/", "_")
+
+    return agent_args
+
+
+VISUAL_SOM_AGENT_LLAMA4_17B_INSTRUCT = get_som_agent("openrouter/meta-llama/llama-4-maverick")
diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py
@@ -322,19 +322,35 @@ def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float
             tracking.TRACKER.instance(input_tokens, output_tokens, cost)
 
         if n_samples == 1:
-            res = AIMessage(completion.choices[0].message.content)
+            res = AIMessage(self.extract_content_with_reasoning(completion.choices[0].message))
             if self.log_probs:
                 res["log_probs"] = completion.choices[0].log_probs
             return res
         else:
-            return [AIMessage(c.message.content) for c in completion.choices]
+            return [
+                AIMessage(self.extract_content_with_reasoning(c.message))
+                for c in completion.choices
+            ]
 
     def get_stats(self):
         return {
             "n_retry_llm": self.retries,
             # "busted_retry_llm": int(not self.success), # not logged if it occurs anyways
         }
 
+    # Support for models that return reasoning.
+    def extract_content_with_reasoning(self, message, wrap_tag="think"):
+        """Extracts the content from the message, including reasoning if available.
+        It wraps the reasoning around <think>...</think> for backward compatibility."""
+
+        reasoning_content = getattr(message, "reasoning", None)
+        if reasoning_content:
+            # Wrap reasoning in <think> tags with newlines for clarity
+            reasoning_content = f"<{wrap_tag}>\n{reasoning_content}\n</{wrap_tag}>\n"
+        else:
+            reasoning_content = ""
+        return f"{reasoning_content}{message.content}"
+
 
 class OpenAIChatModel(ChatModel):
     def __init__(

diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py
@@ -207,4 +207,20 @@
         max_new_tokens=64_000,
         temperature=1e-1,
     ),
+    "openrouter/qwen/qwen2.5-vl-32b-instruct": OpenRouterModelArgs(
+        model_name="qwen/qwen2.5-vl-32b-instruct",
+        max_total_tokens=128_000,
+        max_input_tokens=120_000,
+        max_new_tokens=8_000,
+        temperature=1e-1,
+        vision_support=True,
+    ),
+    "openrouter/qwen/qwen3-32b": OpenRouterModelArgs(
+        model_name="qwen/qwen3-32b",
+        max_total_tokens=128_000,
+        max_input_tokens=120_000,
+        max_new_tokens=8_000,
+        temperature=1e-1,
+        vision_support=True,
+    ),
 }