@@ -11,15 +11,14 @@ import type { XaiProviderOptions } from "@ai-sdk/xai";
1111import type { MuxProviderOptions } from "@/common/types/providerOptions" ;
1212import type { ThinkingLevel } from "@/common/types/thinking" ;
1313import {
14- ANTHROPIC_THINKING_BUDGETS ,
14+ ANTHROPIC_EFFORT ,
1515 GEMINI_THINKING_BUDGETS ,
1616 OPENAI_REASONING_EFFORT ,
1717 OPENROUTER_REASONING_EFFORT ,
1818} from "@/common/types/thinking" ;
1919import { log } from "@/node/services/log" ;
2020import type { MuxMessage } from "@/common/types/message" ;
2121import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy" ;
22- import { getModelStats } from "@/common/utils/tokens/modelStats" ;
2322
2423/**
2524 * OpenRouter reasoning options
@@ -84,23 +83,19 @@ export function buildProviderOptions(
8483
8584 // Build Anthropic-specific options
8685 if ( provider === "anthropic" ) {
87- const budgetTokens = ANTHROPIC_THINKING_BUDGETS [ effectiveThinking ] ;
86+ const effort = ANTHROPIC_EFFORT [ effectiveThinking ] ;
8887 log . debug ( "buildProviderOptions: Anthropic config" , {
89- budgetTokens ,
88+ effort ,
9089 thinkingLevel : effectiveThinking ,
9190 } ) ;
9291
9392 const options : ProviderOptions = {
9493 anthropic : {
9594 disableParallelToolUse : false , // Always enable concurrent tool execution
9695 sendReasoning : true , // Include reasoning traces in requests sent to the model
97- // Conditionally add thinking configuration
98- ...( budgetTokens > 0 && {
99- thinking : {
100- type : "enabled" ,
101- budgetTokens,
102- } ,
103- } ) ,
96+ // Use effort parameter to control token spend (thinking, text, and tool calls)
97+ // SDK auto-adds beta header "effort-2025-11-24" when effort is set
98+ ...( effort && { effort } ) ,
10499 } ,
105100 } ;
106101 log . debug ( "buildProviderOptions: Returning Anthropic options" , options ) ;
@@ -278,77 +273,3 @@ export function buildProviderOptions(
278273 log . debug ( "buildProviderOptions: Unsupported provider" , provider ) ;
279274 return { } ;
280275}
281-
282- /**
283- * Calculate the effective maxOutputTokens for a model based on its limits and thinking budget
284- *
285- * For Anthropic models with extended thinking, the AI SDK adds thinkingBudget to maxOutputTokens
286- * internally. We need to ensure the sum doesn't exceed the model's max_output_tokens limit.
287- *
288- * For example, Claude Opus 4 has max_output_tokens=32000. If we use:
289- * - thinkingBudget=20000 (high)
290- * - maxOutputTokens=32000
291- * Then total=52000 which exceeds 32000 → SDK shows warning and caps output
292- *
293- * Solution: Reduce maxOutputTokens so that maxOutputTokens + thinkingBudget <= model limit
294- *
295- * @param modelString - Full model string (e.g., "anthropic:claude-opus-4-1")
296- * @param thinkingLevel - Current thinking level
297- * @param requestedMaxOutputTokens - Optional user-requested maxOutputTokens
298- * @returns Effective maxOutputTokens that respects model limits with thinking budget
299- */
300- export function calculateEffectiveMaxOutputTokens (
301- modelString : string ,
302- thinkingLevel : ThinkingLevel ,
303- requestedMaxOutputTokens ?: number
304- ) : number | undefined {
305- const [ provider ] = modelString . split ( ":" ) ;
306-
307- // Only apply this adjustment for Anthropic models
308- if ( provider !== "anthropic" ) {
309- return requestedMaxOutputTokens ;
310- }
311-
312- // Get the actual thinking level after policy enforcement
313- const effectiveThinking = enforceThinkingPolicy ( modelString , thinkingLevel ) ;
314- const thinkingBudget = ANTHROPIC_THINKING_BUDGETS [ effectiveThinking ] ;
315-
316- // Get model's max output tokens from models.json
317- const modelStats = getModelStats ( modelString ) ;
318- const modelMaxOutput = modelStats ?. max_output_tokens ;
319-
320- // If we don't know the model's max output, return requested value
321- if ( ! modelMaxOutput ) {
322- log . debug ( "calculateEffectiveMaxOutputTokens: Unknown model max output, using requested" , {
323- modelString,
324- requestedMaxOutputTokens,
325- } ) ;
326- return requestedMaxOutputTokens ;
327- }
328-
329- // Calculate the maximum safe maxOutputTokens
330- // The SDK will add thinkingBudget to maxOutputTokens, so we need room for both
331- const maxSafeOutput = modelMaxOutput - thinkingBudget ;
332-
333- // If user didn't request specific tokens, use the max safe value
334- const targetOutput = requestedMaxOutputTokens ?? modelMaxOutput ;
335-
336- // Cap at the safe maximum
337- const effectiveOutput = Math . min ( targetOutput , maxSafeOutput ) ;
338-
339- // Ensure we don't go below a reasonable minimum (1000 tokens)
340- const finalOutput = Math . max ( effectiveOutput , 1000 ) ;
341-
342- log . debug ( "calculateEffectiveMaxOutputTokens" , {
343- modelString,
344- thinkingLevel,
345- effectiveThinking,
346- thinkingBudget,
347- modelMaxOutput,
348- requestedMaxOutputTokens,
349- maxSafeOutput,
350- finalOutput,
351- } ) ;
352-
353- return finalOutput ;
354- }
0 commit comments