diff --git a/.gitignore b/.gitignore
index c878e3d1..fc61d50e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,5 +174,4 @@ miniwob-plusplus/
 debugging_results/
 
 # working files
-main_miniwob_debug.py
-main_workarena_debug.py
+experiments/*
\ No newline at end of file
diff --git a/src/agentlab/agents/generic_agent/__init__.py b/src/agentlab/agents/generic_agent/__init__.py
index ad9b0348..cb5bbb7f 100644
--- a/src/agentlab/agents/generic_agent/__init__.py
+++ b/src/agentlab/agents/generic_agent/__init__.py
@@ -9,20 +9,23 @@
 from .agent_configs import (
     AGENT_3_5,
     AGENT_8B,
+    AGENT_37_SONNET,
+    AGENT_CLAUDE_SONNET_35,
+    AGENT_CLAUDE_SONNET_35_VISION,
     AGENT_CUSTOM,
-    AGENT_LLAMA4_17B_INSTRUCT,
     AGENT_LLAMA3_70B,
+    AGENT_LLAMA4_17B_INSTRUCT,
     AGENT_LLAMA31_70B,
+    CHAT_MODEL_ARGS_DICT,
     RANDOM_SEARCH_AGENT,
     AGENT_4o,
     AGENT_4o_MINI,
-    AGENT_CLAUDE_SONNET_35,
-    AGENT_37_SONNET,
-    AGENT_CLAUDE_SONNET_35_VISION,
-    AGENT_4o_VISION,
     AGENT_4o_MINI_VISION,
-    AGENT_o3_MINI,
+    AGENT_4o_VISION,
     AGENT_o1_MINI,
+    AGENT_o3_MINI,
+    FLAGS_GPT_4o,
+    GenericAgentArgs,
 )
 
 __all__ = [
diff --git a/src/agentlab/agents/tool_use_agent/__init__.py b/src/agentlab/agents/tool_use_agent/__init__.py
index b03b1169..935fea14 100644
--- a/src/agentlab/agents/tool_use_agent/__init__.py
+++ b/src/agentlab/agents/tool_use_agent/__init__.py
@@ -1,4 +1,6 @@
 import sys
 
+from agentlab.agents.tool_use_agent.tool_use_agent import *
+
 # for backward compatibility of unpickling
 sys.modules[__name__ + ".multi_tool_agent"] = sys.modules[__name__]
diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py
index 86140d02..28b97e82 100644
--- a/src/agentlab/agents/tool_use_agent/tool_use_agent.py
+++ b/src/agentlab/agents/tool_use_agent/tool_use_agent.py
@@ -147,7 +147,7 @@ def apply(self, llm, discussion: StructuredDiscussion, obs: dict) -> dict:
 
 AXTREE_NOTE = """
 AXTree extracts most of the interactive elements of the DOM in a tree structure. It may also contain information that is not visible in the screenshot.
-A line starting with [bid] is a node in the AXTree. It is a unique alpha-numeric identifier to be used when calling tools.
+A line starting with [bid] is a node in the AXTree. It is a unique alpha-numeric identifier to be used when calling tools, e.g, click(bid="a253"). Make sure to include letters and numbers in the bid.
 """
 
 
@@ -347,7 +347,7 @@ class PromptConfig:
     task_hint: TaskHint = None
     keep_last_n_obs: int = 1
     multiaction: bool = False
-    action_subsets: tuple[str] = field(default_factory=lambda: ("coord",))
+    action_subsets: tuple[str] = None
 
 
 @dataclass
@@ -498,6 +498,15 @@ def get_action(self, obs: Any) -> float:
     vision_support=True,
 )
 
+GPT_4_1_MINI = OpenAIResponseModelArgs(
+    model_name="gpt-4.1-mini",
+    max_total_tokens=200_000,
+    max_input_tokens=200_000,
+    max_new_tokens=2_000,
+    temperature=0.1,
+    vision_support=True,
+)
+
 OPENAI_CHATAPI_MODEL_CONFIG = OpenAIChatModelArgs(
     model_name="gpt-4o-2024-08-06",
     max_total_tokens=200_000,
diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py
index f56d607c..ea5371b9 100644
--- a/src/agentlab/analyze/agent_xray.py
+++ b/src/agentlab/analyze/agent_xray.py
@@ -601,7 +601,13 @@ def get_screenshot(
         if annotate:
             action_str = step_info.action
             properties = step_info.obs.get("extra_element_properties", None)
-            action_colored = annotate_action(img, action_string=action_str, properties=properties)
+            try:
+                action_colored = annotate_action(
+                    img, action_string=action_str, properties=properties
+                )
+            except Exception as e:
+                warning(f"Failed to annotate action: {e}")
+                action_colored = action_str
         else:
             action_colored = None
         return img, action_colored
diff --git a/src/agentlab/analyze/archive_studies.py b/src/agentlab/analyze/archive_studies.py
new file mode 100644
index 00000000..e82934b4
--- /dev/null
+++ b/src/agentlab/analyze/archive_studies.py
@@ -0,0 +1,122 @@
+import os
+from dataclasses import dataclass
+from pathlib import Path
+
+import pandas as pd
+from tqdm import tqdm
+
+from agentlab.analyze import inspect_results
+from agentlab.experiments.exp_utils import RESULTS_DIR
+from agentlab.experiments.study import Study
+
+
+@dataclass
+class StudyInfo:
+    study_dir: Path
+    study: Study
+    summary_df: pd.DataFrame
+    should_delete: bool = False
+    reason: str = ""
+
+
+def search_for_reasons_to_archive(result_dir: Path, min_study_size: int = 0) -> list[StudyInfo]:
+
+    study_info_list = []
+    study_dirs = list(result_dir.iterdir())
+    progress = tqdm(study_dirs, desc="Processing studies")
+    for study_dir in progress:
+
+        progress.set_postfix({"study_dir": study_dir})
+        if not study_dir.is_dir():
+            progress.set_postfix({"status": "skipped"})
+            continue
+
+        try:
+            study = Study.load(study_dir)
+        except Exception:
+            study = None
+        # get summary*.csv files and find the most recent
+        summary_files = list(study_dir.glob("summary*.csv"))
+
+        if len(summary_files) != 0:
+            most_recent_summary = max(summary_files, key=os.path.getctime)
+            summary_df = pd.read_csv(most_recent_summary)
+
+        else:
+            try:
+                result_df = inspect_results.load_result_df(study_dir, progress_fn=None)
+                summary_df = inspect_results.summarize_study(result_df)
+            except Exception as e:
+                print(f"  Error processing {study_dir}: {e}")
+                continue
+
+        study_info = StudyInfo(
+            study_dir=study_dir,
+            study=study,
+            summary_df=summary_df,
+        )
+
+        if len(study_info.summary_df) == 0:
+            study_info.should_delete = True
+            study_info.reason = "Empty summary DataFrame"
+
+        n_completed, n_total, n_err = 0, 0, 0
+
+        for _, row in study_info.summary_df.iterrows():
+            n_comp, n_tot = row["n_completed"].split("/")
+            n_completed += int(n_comp)
+            n_total += int(n_tot)
+            n_err += int(row.get("n_err"))
+
+        n_finished = n_completed - n_err
+
+        # print(summary_df)
+        # print(f"  {n_completed} / {n_total}, {n_err} errors")
+
+        if "miniwob-tiny-test" in study_dir.name:
+            study_info.should_delete = True
+            study_info.reason += "Miniwob tiny test\n"
+        if n_total == 0:
+            study_info.should_delete = True
+            study_info.reason += "No tasks\n"
+        if n_completed == 0:
+            study_info.should_delete = True
+            study_info.reason += "No tasks completed\n"
+        if float(n_finished) / float(n_total) < 0.5:
+            study_info.should_delete = True
+            study_info.reason += f"Less than 50% tasks finished, n_err: {n_err}, n_total: {n_total}, n_finished: {n_finished}, n_completed: {n_completed}\n"
+
+        if n_total <= min_study_size:
+            study_info.should_delete = True
+            study_info.reason += (
+                f"Too few tasks. n_total ({n_total}) <= min_study_size ({min_study_size})\n"
+            )
+
+        study_info_list.append(study_info)
+    return study_info_list
+
+
+if __name__ == "__main__":
+    study_list_info = search_for_reasons_to_archive(RESULTS_DIR, min_study_size=5)
+    archive_dir = RESULTS_DIR.parent / "archived_agentlab_results"  # type: Path
+    archive_dir.mkdir(parents=True, exist_ok=True)
+
+    # Uncomment the line below to prevent moving studies to archive
+    archive_dir = None
+
+    for study_info in study_list_info:
+        if not study_info.should_delete:
+            continue
+
+        print(f"Study: {study_info.study_dir.name}")
+        print(f"  Reason: {study_info.reason}")
+        print(study_info.summary_df)
+        print()
+
+        if archive_dir is not None:
+            # move to new dir
+            new_path = archive_dir / study_info.study_dir.name
+            study_info.study_dir.rename(new_path)
+            # save reason in a file
+            reason_file = new_path / "reason_to_archive.txt"
+            reason_file.write_text(study_info.reason)
diff --git a/src/agentlab/experiments/graph_execution_ray.py b/src/agentlab/experiments/graph_execution_ray.py
index b8ba5b59..f047f866 100644
--- a/src/agentlab/experiments/graph_execution_ray.py
+++ b/src/agentlab/experiments/graph_execution_ray.py
@@ -1,7 +1,3 @@
-# import os
-
-# # Disable Ray log deduplication
-# os.environ["RAY_DEDUP_LOGS"] = "0"
 import logging
 import time
 
@@ -90,12 +86,22 @@ def poll_for_timeout(tasks: dict[str, ray.ObjectRef], timeout: float, poll_inter
 
 
 def get_elapsed_time(task_ref: ray.ObjectRef):
-    task_id = task_ref.task_id().hex()
-    task_info = state.get_task(task_id, address="auto")
-    if task_info and task_info.start_time_ms is not None:
-        start_time_s = task_info.start_time_ms / 1000.0  # Convert ms to s
+    try:
+        task_id = task_ref.task_id().hex()
+        task_info = state.get_task(task_id, address="auto")
+        if not task_info:
+            return None
+        if not isinstance(task_info, list):
+            task_info = [task_info]
+
+        start_times_ms = [getattr(t, "start_time_ms", None) for t in task_info]
+        start_time_s = max([t / 1000.0 if t is not None else -1 for t in start_times_ms])
+        if start_time_s < 0:
+            return None  # Task has not started yet
+
         current_time_s = time.time()
         elapsed_time = current_time_s - start_time_s
         return elapsed_time
-    else:
-        return None  # Task has not started yet
+    except Exception as e:
+        logger.warning(f"Could not get elapsed time for task {task_id}: {e}")
+        return None
diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index ac77e01f..7b5b280f 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -25,7 +25,11 @@
 from PIL import Image
 from tqdm import tqdm
 
-from agentlab.agents.tapeagent import TapeAgent, save_tape
+try:
+    from agentlab.agents.tapeagent import TapeAgent, save_tape
+except ImportError:
+    TapeAgent = None
+
 
 logger = logging.getLogger(__name__)
 
@@ -474,7 +478,7 @@ def run(self):
                     err_msg = f"Exception uncaught by agent or environment in task {self.env_args.task_name}.\n{type(e).__name__}:\n{e}"
                 logger.info("Saving experiment info.")
                 self.save_summary_info(episode_info, Path(self.exp_dir), err_msg, stack_trace)
-                if isinstance(agent, TapeAgent):
+                if TapeAgent is not None and isinstance(agent, TapeAgent):
                     task = getattr(env, "task", {})
                     save_tape(self.exp_dir, episode_info, task, agent.final_tape)
             except Exception as e:
diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py
index 371229b5..3285b877 100644
--- a/src/agentlab/llm/chat_api.py
+++ b/src/agentlab/llm/chat_api.py
@@ -6,6 +6,7 @@
 from functools import partial
 from typing import Optional
 
+import anthropic
 import openai
 from huggingface_hub import InferenceClient
 from openai import AzureOpenAI, OpenAI
@@ -471,3 +472,77 @@ def __init__(
             client_args={"base_url": "http://0.0.0.0:8000/v1"},
             pricing_func=None,
         )
+
+
+class AnthropicChatModel(AbstractChatModel):
+    def __init__(
+        self,
+        model_name,
+        api_key=None,
+        temperature=0.5,
+        max_tokens=100,
+        max_retry=4,
+        log_probs=False,
+    ):
+        self.model_name = model_name
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.max_retry = max_retry
+        self.log_probs = log_probs
+
+        api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
+        self.client = anthropic.Anthropic(api_key=api_key)
+
+    def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float = None) -> dict:
+        # Convert OpenAI format to Anthropic format
+        system_message = None
+        anthropic_messages = []
+
+        for msg in messages:
+            if msg["role"] == "system":
+                system_message = msg["content"]
+            else:
+                anthropic_messages.append({"role": msg["role"], "content": msg["content"]})
+
+        temperature = temperature if temperature is not None else self.temperature
+
+        for attempt in range(self.max_retry):
+            try:
+                kwargs = {
+                    "model": self.model_name,
+                    "messages": anthropic_messages,
+                    "max_tokens": self.max_tokens,
+                    "temperature": temperature,
+                }
+
+                if system_message:
+                    kwargs["system"] = system_message
+
+                response = self.client.messages.create(**kwargs)
+
+                # Track usage if available
+                if hasattr(tracking.TRACKER, "instance"):
+                    tracking.TRACKER.instance(
+                        response.usage.input_tokens,
+                        response.usage.output_tokens,
+                        0,  # cost calculation would need pricing info
+                    )
+
+                return AIMessage(response.content[0].text)
+
+            except Exception as e:
+                if attempt == self.max_retry - 1:
+                    raise e
+                logging.warning(f"Anthropic API error (attempt {attempt + 1}): {e}")
+                time.sleep(60)  # Simple retry delay
+
+
+@dataclass
+class AnthropicModelArgs(BaseModelArgs):
+    def make_model(self):
+        return AnthropicChatModel(
+            model_name=self.model_name,
+            temperature=self.temperature,
+            max_tokens=self.max_new_tokens,
+            log_probs=self.log_probs,
+        )
diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py
index 5125801d..2570d0cd 100644
--- a/src/agentlab/llm/llm_configs.py
+++ b/src/agentlab/llm/llm_configs.py
@@ -17,6 +17,20 @@
 ]
 
 CHAT_MODEL_ARGS_DICT = {
+    "openai/gpt-4.1-mini-2025-04-14": OpenAIModelArgs(
+        model_name="gpt-4.1-mini-2025-04-14",
+        max_total_tokens=128_000,
+        max_input_tokens=128_000,
+        max_new_tokens=16_384,
+        vision_support=True,
+    ),
+    "openai/gpt-4.1-2025-04-14": OpenAIModelArgs(
+        model_name="gpt-4.1-2025-04-14",
+        max_total_tokens=128_000,
+        max_input_tokens=128_000,
+        max_new_tokens=16_384,
+        vision_support=True,
+    ),
     "openai/o3-mini-2025-01-31": OpenAIModelArgs(
         model_name="o3-mini-2025-01-31",
         max_total_tokens=200_000,
diff --git a/src/agentlab/llm/response_api.py b/src/agentlab/llm/response_api.py
index 2b9f696c..06f346a2 100644
--- a/src/agentlab/llm/response_api.py
+++ b/src/agentlab/llm/response_api.py
@@ -360,14 +360,7 @@ def _parse_response(self, response: dict) -> dict:
         interesting_keys = ["output_text"]
         for output in response.output:
             if output.type == "function_call":
-                arguments = json.loads(output.arguments)
-                func_args_str = ", ".join(
-                    [
-                        f'{k}="{v}"' if isinstance(v, str) else f"{k}={v}"
-                        for k, v in arguments.items()
-                    ]
-                )
-                result.action = f"{output.name}({func_args_str})"
+                result.action = tool_call_to_python_code(output.name, json.loads(output.arguments))
                 result.tool_calls = output
                 break
             elif output.type == "reasoning":
@@ -603,13 +596,7 @@ def _parse_response(self, response: dict) -> dict:
         )
         for output in response.content:
             if output.type == "tool_use":
-                func_args_str = ", ".join(
-                    [
-                        f'{k}="{v}"' if isinstance(v, str) else f"{k}={v}"
-                        for k, v in output.input.items()
-                    ]
-                )
-                result.action = f"{output.name}({func_args_str})"
+                result.action = tool_call_to_python_code(output.name, output.input)
             elif output.type == "text":
                 result.think += output.text
         return result
@@ -736,3 +723,15 @@ def make_model(self, extra_kwargs=None, **kwargs):
 
     def get_message_builder(self) -> MessageBuilder:
         return OpenAIChatCompletionAPIMessageBuilder
+
+
+def tool_call_to_python_code(func_name, kwargs):
+    """Format a function name and kwargs dict into a Python function call string."""
+    if kwargs is None:
+        kwargs = {}
+
+    if not kwargs:
+        return f"{func_name}()"
+
+    args_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
+    return f"{func_name}({args_str})"
diff --git a/tests/llm/test_chat_api.py b/tests/llm/test_chat_api.py
index f06fa7fa..4f74c56a 100644
--- a/tests/llm/test_chat_api.py
+++ b/tests/llm/test_chat_api.py
@@ -3,6 +3,7 @@
 import pytest
 
 from agentlab.llm.chat_api import (
+    AnthropicModelArgs,
     AzureModelArgs,
     OpenAIModelArgs,
     make_system_message,
@@ -59,3 +60,27 @@ def test_api_model_args_openai():
     answer = model(messages)
 
     assert "5" in answer.get("content")
+
+
+@pytest.mark.pricy
+@pytest.mark.skipif(skip_tests, reason="Skipping on remote as Anthropic is pricy")
+def test_api_model_args_anthropic():
+    model_args = AnthropicModelArgs(
+        model_name="claude-3-haiku-20240307",
+        max_total_tokens=8192,
+        max_input_tokens=8192 - 512,
+        max_new_tokens=512,
+        temperature=1e-1,
+    )
+    model = model_args.make_model()
+
+    messages = [
+        make_system_message("You are an helpful virtual assistant"),
+        make_user_message("Give the third prime number. Just the number, no explanation."),
+    ]
+    answer = model(messages)
+    assert "5" in answer.get("content")
+
+
+if __name__ == "__main__":
+    test_api_model_args_anthropic()
diff --git a/tests/llm/test_response_api.py b/tests/llm/test_response_api.py
index 16316a92..567b49da 100644
--- a/tests/llm/test_response_api.py
+++ b/tests/llm/test_response_api.py
@@ -299,7 +299,7 @@ def test_claude_response_model_parse_and_cost():
         content for content in parsed_output.raw_response.content if content.type == "tool_use"
     ]
     assert "Thinking about the request." in parsed_output.think
-    assert parsed_output.action == 'search_web(query="latest news")'
+    assert parsed_output.action == "search_web(query='latest news')"
     assert fn_calls[0].id == "tool_abc"
     assert global_tracker.stats["input_tokens"] == 40
     assert global_tracker.stats["output_tokens"] == 20
@@ -348,7 +348,7 @@ def test_openai_response_model_parse_and_cost():
     fn_calls = [
         content for content in parsed_output.raw_response.output if content.type == "function_call"
     ]
-    assert parsed_output.action == 'get_current_weather(location="Boston, MA", unit="celsius")'
+    assert parsed_output.action == "get_current_weather(location='Boston, MA', unit='celsius')"
     assert fn_calls[0].call_id == "call_abc123"
     assert parsed_output.raw_response == mock_api_resp
     assert global_tracker.stats["input_tokens"] == 70
@@ -716,3 +716,53 @@ def test_claude_model_with_multiple_messages_pricy_call():
 # TODO: Add tests for image token costing (this is complex and model-specific)
 #       - For OpenAI, you'd need to know how they bill for images (e.g., fixed cost per image + tokens for text parts)
 #       - You'd likely need to mock the response from client.chat.completions.create to include specific usage for images.
+
+
+EDGE_CASES = [
+    # 1. Empty kwargs dict
+    ("valid_function", {}, "valid_function()"),
+    # 2. Kwargs with problematic string values (quotes, escapes, unicode)
+    (
+        "send_message",
+        {
+            "text": 'He said "Hello!" and used a backslash: \\',
+            "unicode": "Café naïve résumé 🚀",
+            "newlines": "Line1\nLine2\tTabbed",
+        },
+        "send_message(text='He said \"Hello!\" and used a backslash: \\\\', unicode='Café naïve résumé 🚀', newlines='Line1\\nLine2\\tTabbed')",
+    ),
+    # 3. Mixed types including problematic float values
+    (
+        "complex_call",
+        {
+            "infinity": float("inf"),
+            "nan": float("nan"),
+            "negative_zero": -0.0,
+            "scientific": 1.23e-45,
+        },
+        "complex_call(infinity=inf, nan=nan, negative_zero=-0.0, scientific=1.23e-45)",
+    ),
+    # 4. Deeply nested structures that could stress repr()
+    (
+        "process_data",
+        {
+            "nested": {"level1": {"level2": {"level3": [1, 2, {"deep": True}]}}},
+            "circular_ref_like": {"a": {"b": {"c": "back_to_start"}}},
+        },
+        "process_data(nested={'level1': {'level2': {'level3': [1, 2, {'deep': True}]}}}, circular_ref_like={'a': {'b': {'c': 'back_to_start'}}})",
+    ),
+]
+
+
+def test_tool_call_to_python_code():
+    from agentlab.llm.response_api import tool_call_to_python_code
+
+    for edge_case in EDGE_CASES:
+        func_name, kwargs, expected = edge_case
+        result = tool_call_to_python_code(func_name, kwargs)
+        print(result)
+        assert result == expected, f"Expected {expected} but got {result}"
+
+
+if __name__ == "__main__":
+    test_tool_call_to_python_code()
diff --git a/tests/verify_rate_limit_anthropic.py b/tests/verify_rate_limit_anthropic.py
new file mode 100644
index 00000000..5ff2d91d
--- /dev/null
+++ b/tests/verify_rate_limit_anthropic.py
@@ -0,0 +1,89 @@
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import anthropic
+
+client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
+
+
+def make_request(messages):
+    response = client.messages.create(
+        model="claude-3-5-sonnet-20241022", max_tokens=10, messages=messages
+    )
+    return response.usage
+
+
+def make_message(text):
+    return {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": text,
+            }
+        ],
+    }
+
+
+def add_cache_control(message: dict, cache_type="ephemeral"):
+    message["content"][0]["cache_control"] = {"type": cache_type}
+
+
+def remove_cache_control(message: dict):
+    if "cache_control" in message["content"][0]:
+        del message["content"][0]["cache_control"]
+
+
+def test_rate_limit_single(thread_id):
+    # Create ~100k token message that will be cached
+    big_text = "This is a large block of text for caching. " * 10000  # ~100k tokens
+    medium_text = "This is a large block of text for caching. " * 2000  # ~10k tokens
+
+    print(f"Thread {thread_id}: Starting rate limit test with cached content...")
+
+    # Rebuild conversation each time (simulating web agent)
+    messages = []
+
+    # Add all previous conversation turns
+    for i in range(5):
+        if i == 0:
+            messages.append(make_message(big_text))
+            t0 = time.time()
+        else:
+            messages.append(make_message(medium_text))
+        add_cache_control(messages[-1])
+        try:
+            usage = make_request(messages)
+            dt = time.time() - t0
+            print(f"{dt:.2f}: Thread {thread_id}: {usage}")
+        except Exception as e:
+            print(f"Thread {thread_id}: Error - {e}")
+            break
+        remove_cache_control(messages[-1])
+
+
+def test_rate_limit_parallel(num_threads=3):
+    print(f"Starting parallel rate limit test with {num_threads} threads...")
+
+    with ThreadPoolExecutor(max_workers=num_threads) as executor:
+        futures = [executor.submit(test_rate_limit_single, i) for i in range(num_threads)]
+
+        for future in as_completed(futures):
+            try:
+                future.result()
+            except Exception as e:
+                print(f"Thread completed with error: {e}")
+
+
+def test_rate_limit():
+    # Original single-threaded version
+    test_rate_limit_single(0)
+
+
+if __name__ == "__main__":
+    # Use parallel version to quickly exhaust rate limits
+    test_rate_limit_parallel(num_threads=3)
+
+    # Or use original single-threaded version
+    # test_rate_limit()