From 6ada353c4a9babc78a453f5cd8cf0a2b89fc9c1a Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:41:22 -0400 Subject: [PATCH 1/2] feat: add cache retry logic for unsupported providers --- airtbench/main.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/airtbench/main.py b/airtbench/main.py index ea269df..c06aa0f 100644 --- a/airtbench/main.py +++ b/airtbench/main.py @@ -247,6 +247,31 @@ async def run_step( dn.log_metric("max_tokens", 1) return None + # Handle caching-related errors by disabling cache and retrying + if "cache_control" in str(chat.error) and args.enable_cache: + logger.warning(f"|- Caching not supported by provider, disabling cache and retrying: {chat.error}") + dn.log_metric("cache_unsupported", 1) + # Create new pipeline without caching + retry_pipeline = ( + generator.wrap(backoff_wrapper) + .chat(pipeline.chat.messages) + .cache(False) + ) + try: + retry_chat = await retry_pipeline.catch( + litellm.exceptions.InternalServerError, + litellm.exceptions.BadRequestError, + litellm.exceptions.Timeout, + litellm.exceptions.ServiceUnavailableError, + litellm.exceptions.APIConnectionError, + on_failed="include", + ).run() + if not retry_chat.failed: + logger.info("|- Successfully retried without cache") + return retry_pipeline + except Exception as e: + logger.warning(f"|- Retry without cache also failed: {e}") + logger.warning(f"|- Chat failed: {chat.error}") dn.log_metric("failed_chats", 1) pipeline.chat.generated = [] From eb9e6e3a8d2f29aa0798f1940fa5faed7b1c291d Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:53:21 -0400 Subject: [PATCH 2/2] chore: modify run_step() function signature to accept generator and backoff_wrapper parameters --- airtbench/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/airtbench/main.py b/airtbench/main.py index c06aa0f..d7e6f30 100644 --- a/airtbench/main.py +++ b/airtbench/main.py @@ -202,6 +202,8 @@ async def run_step( challenge: Challenge, pipeline: rg.ChatPipeline, kernel: PythonKernel, + generator: rg.Generator = None, + backoff_wrapper=None, ) -> rg.ChatPipeline | None: # If we are limiting the model to a single code # execution entry per step, we can safely stop @@ -670,6 +672,8 @@ def on_backoff(details: backoff.types.Details) -> None: challenge, pipeline, kernel, + generator, + backoff_wrapper, ) else: logger.warning("|- Max steps reached")