diff --git a/utils/call_llm.py b/utils/call_llm.py index 70c9e83a..a6d28cf5 100644 --- a/utils/call_llm.py +++ b/utils/call_llm.py @@ -4,6 +4,9 @@ import json import requests from datetime import datetime +from google.genai import types +from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type +from google.api_core.exceptions import ResourceExhausted # Configure logging log_directory = os.getenv("LOG_DIR", "logs") @@ -158,7 +161,15 @@ def call_llm(prompt: str, use_cache: bool = True) -> str: return response_text +# Add this decorator above your function +@retry( + retry=retry_if_exception_type(ResourceExhausted), + wait=wait_exponential(multiplier=2, min=4, max=60), # Wait 4s, then 8s, then 16s... + stop=stop_after_attempt(5) +) + def _call_llm_gemini(prompt: str) -> str: + # Initialize client based on available credentials if os.getenv("GEMINI_PROJECT_ID"): client = genai.Client( vertexai=True, @@ -169,12 +180,47 @@ def _call_llm_gemini(prompt: str) -> str: client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) else: raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment") - model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25") - response = client.models.generate_content( - model=model, - contents=[prompt] - ) - return response.text + + # Default to Gemini 3 Pro Preview if not set + model = os.getenv("GEMINI_MODEL", "gemini-3-pro-preview") + + # Configure Thinking based on the model version + config = None + + # GEMINI 3 SPECIFIC: Uses 'thinking_level' (Low/High) + if "gemini-3" in model: + config = types.GenerateContentConfig( + thinking_config=types.ThinkingConfig( + include_thoughts=True, + thinking_level="high" # Options: "low", "high" + ) + ) + # GEMINI 2.5 SPECIFIC: Uses 'thinking_budget' (Token count) + elif "thinking" in model or "gemini-2.5" in model: + config = types.GenerateContentConfig( + thinking_config=types.ThinkingConfig( + include_thoughts=True, + thinking_budget=1024 + ) + ) + + try: + response = client.models.generate_content( + model=model, + contents=[prompt], + config=config + ) + return response.text + except Exception as e: + # Fallback: specific error handling for models that don't support thinking + if "400" in str(e) and "thinking" in str(e).lower(): + logger.warning(f"Thinking config not supported for {model}, retrying without it.") + response = client.models.generate_content( + model=model, + contents=[prompt] + ) + return response.text + raise e if __name__ == "__main__": test_prompt = "Hello, how are you?"