fix: add litellm retry with exponential backoff for rate limit errors

sayakmaity · sayakmaity · commit ccdb24a08607 · 2026-02-05T15:38:08.000-05:00
- Add `num_retries=3` default to LLMConfig so litellm retries on OpenAI 429 rate limit errors with built-in exponential backoff - Increase Temporal DEFAULT_RETRY_POLICY from 1 attempt (no retries) to 3 attempts with exponential backoff (1s, 2s, 4s... up to 30s) This complements the HTTPX connection limit reduction in agentex backend (scaleapi/scale-agentex#144) to address OpenAI rate limiting under high concurrent load.
diff --git a/src/agentex/lib/adk/providers/_modules/litellm.py b/src/agentex/lib/adk/providers/_modules/litellm.py
@@ -26,7 +26,13 @@
 logger = make_logger(__name__)
 
 # Default retry policy for all LiteLLM operations
-DEFAULT_RETRY_POLICY = RetryPolicy(maximum_attempts=1)
+# Retries with exponential backoff: 1s, 2s, 4s, ... up to 30s between attempts
+DEFAULT_RETRY_POLICY = RetryPolicy(
+    maximum_attempts=3,
+    initial_interval=timedelta(seconds=1),
+    backoff_coefficient=2.0,
+    maximum_interval=timedelta(seconds=30),
+)
 
 
 class LiteLLMModule:
diff --git a/src/agentex/lib/types/llm_messages.py b/src/agentex/lib/types/llm_messages.py
@@ -58,6 +58,7 @@ class LLMConfig(BaseModel):
     parallel_tool_calls: bool | None = None
     logprobs: bool | None = None
     top_logprobs: int | None = None
+    num_retries: int | None = 3
 
 
 class ContentPartText(BaseModel):