fix: enforce tier-aware max_tokens in AutoAgents LLM calls

jruokola · jruokola · commit 1febc158740a · 2025-11-18T11:11:49.000+02:00
CodeGraphChatAdapter was creating GenerationConfig with max_tokens: None,
ignoring tier-based token limits. This caused all responses to use
default limits regardless of tier.

Changes:
- Store tier in CodeGraphChatAdapter
- Add get_max_tokens() method with env var override
- Set max_tokens in GenerationConfig based on tier

Token limits by tier:
- Small: 2,048 tokens
- Medium: 4,096 tokens
- Large: 8,192 tokens
- Massive: 16,384 tokens

Environment variable override:
- Set MCP_CODE_AGENT_MAX_OUTPUT_TOKENS to override tier defaults
- Useful for testing or specific deployment constraints

This ensures analysis output length matches tier expectations.
diff --git a/crates/codegraph-mcp/src/autoagents/agent_builder.rs b/crates/codegraph-mcp/src/autoagents/agent_builder.rs
@@ -34,11 +34,33 @@ pub(crate) fn convert_messages(messages: &[Message]) -> Vec<ChatMessage> {
 /// Adapter that bridges codegraph_ai::LLMProvider to AutoAgents ChatProvider
 pub struct CodeGraphChatAdapter {
     provider: Arc<dyn CodeGraphLLM>,
+    tier: ContextTier,
 }
 
 impl CodeGraphChatAdapter {
-    pub fn new(provider: Arc<dyn CodeGraphLLM>) -> Self {
-        Self { provider }
+    pub fn new(provider: Arc<dyn CodeGraphLLM>, tier: ContextTier) -> Self {
+        Self { provider, tier }
+    }
+
+    /// Get tier-aware max_tokens, respecting environment variable override
+    fn get_max_tokens(&self) -> Option<usize> {
+        // Check for environment variable override first
+        if let Ok(val) = std::env::var("MCP_CODE_AGENT_MAX_OUTPUT_TOKENS") {
+            if let Ok(tokens) = val.parse::<usize>() {
+                tracing::info!("Using MCP_CODE_AGENT_MAX_OUTPUT_TOKENS={}", tokens);
+                return Some(tokens);
+            }
+        }
+
+        // Use tier-based defaults
+        let tokens = match self.tier {
+            ContextTier::Small => 2048,
+            ContextTier::Medium => 4096,
+            ContextTier::Large => 8192,
+            ContextTier::Massive => 16384,
+        };
+
+        Some(tokens)
     }
 }
 
@@ -80,7 +102,7 @@ impl ChatProvider for CodeGraphChatAdapter {
         // Call CodeGraph LLM provider with structured output support
         let config = codegraph_ai::llm_provider::GenerationConfig {
             temperature: 0.1,
-            max_tokens: None,
+            max_tokens: self.get_max_tokens(),
             response_format,
             ..Default::default()
         };
@@ -357,7 +379,7 @@ impl CodeGraphAgentBuilder {
         analysis_type: AnalysisType,
     ) -> Self {
         Self {
-            llm_adapter: Arc::new(CodeGraphChatAdapter::new(llm_provider)),
+            llm_adapter: Arc::new(CodeGraphChatAdapter::new(llm_provider, tier)),
             tool_factory: GraphToolFactory::new(tool_executor),
             tier,
             analysis_type,