Skip to content

Commit b7826b6

Browse files
committed
🤖 feat: use effort parameter for Claude thinking
Replace budget_tokens approach with new effort parameter: - ANTHROPIC_EFFORT maps thinking levels to 'low' | 'medium' | 'high' - SDK auto-adds 'effort-2025-11-24' beta header - Remove calculateEffectiveMaxOutputTokens (no longer needed) - Effort controls all token spend: thinking, text, and tool calls _Generated with mux_
1 parent 6830b71 commit b7826b6

File tree

3 files changed

+23
-110
lines changed

3 files changed

+23
-110
lines changed

src/common/types/thinking.ts

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,23 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
1414
export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;
1515

1616
/**
17-
* Anthropic thinking token budget mapping
17+
* Anthropic effort level mapping
1818
*
19-
* These heuristics balance thinking depth with response time and cost:
20-
* - off: No extended thinking
21-
* - low: Quick thinking for straightforward tasks (4K tokens)
22-
* - medium: Standard thinking for moderate complexity (10K tokens)
23-
* - high: Deep thinking for complex problems (20K tokens)
19+
* Maps our unified thinking levels to Anthropic's effort parameter:
20+
* - off: No effort specified (undefined)
21+
* - low: Most efficient - significant token savings
22+
* - medium: Balanced approach with moderate token savings
23+
* - high: Maximum capability (default behavior)
24+
*
25+
* The effort parameter controls all token spend including thinking,
26+
* text responses, and tool calls. Unlike budget_tokens, it doesn't require
27+
* thinking to be explicitly enabled.
2428
*/
25-
export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
26-
off: 0,
27-
low: 4000,
28-
medium: 10000,
29-
high: 20000,
29+
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
30+
off: undefined,
31+
low: "low",
32+
medium: "medium",
33+
high: "high",
3034
};
3135

3236
/**

src/common/utils/ai/providerOptions.ts

Lines changed: 6 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,14 @@ import type { XaiProviderOptions } from "@ai-sdk/xai";
1111
import type { MuxProviderOptions } from "@/common/types/providerOptions";
1212
import type { ThinkingLevel } from "@/common/types/thinking";
1313
import {
14-
ANTHROPIC_THINKING_BUDGETS,
14+
ANTHROPIC_EFFORT,
1515
GEMINI_THINKING_BUDGETS,
1616
OPENAI_REASONING_EFFORT,
1717
OPENROUTER_REASONING_EFFORT,
1818
} from "@/common/types/thinking";
1919
import { log } from "@/node/services/log";
2020
import type { MuxMessage } from "@/common/types/message";
2121
import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy";
22-
import { getModelStats } from "@/common/utils/tokens/modelStats";
2322

2423
/**
2524
* OpenRouter reasoning options
@@ -84,23 +83,19 @@ export function buildProviderOptions(
8483

8584
// Build Anthropic-specific options
8685
if (provider === "anthropic") {
87-
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
86+
const effort = ANTHROPIC_EFFORT[effectiveThinking];
8887
log.debug("buildProviderOptions: Anthropic config", {
89-
budgetTokens,
88+
effort,
9089
thinkingLevel: effectiveThinking,
9190
});
9291

9392
const options: ProviderOptions = {
9493
anthropic: {
9594
disableParallelToolUse: false, // Always enable concurrent tool execution
9695
sendReasoning: true, // Include reasoning traces in requests sent to the model
97-
// Conditionally add thinking configuration
98-
...(budgetTokens > 0 && {
99-
thinking: {
100-
type: "enabled",
101-
budgetTokens,
102-
},
103-
}),
96+
// Use effort parameter to control token spend (thinking, text, and tool calls)
97+
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
98+
...(effort && { effort }),
10499
},
105100
};
106101
log.debug("buildProviderOptions: Returning Anthropic options", options);
@@ -278,77 +273,3 @@ export function buildProviderOptions(
278273
log.debug("buildProviderOptions: Unsupported provider", provider);
279274
return {};
280275
}
281-
282-
/**
283-
* Calculate the effective maxOutputTokens for a model based on its limits and thinking budget
284-
*
285-
* For Anthropic models with extended thinking, the AI SDK adds thinkingBudget to maxOutputTokens
286-
* internally. We need to ensure the sum doesn't exceed the model's max_output_tokens limit.
287-
*
288-
* For example, Claude Opus 4 has max_output_tokens=32000. If we use:
289-
* - thinkingBudget=20000 (high)
290-
* - maxOutputTokens=32000
291-
* Then total=52000 which exceeds 32000 → SDK shows warning and caps output
292-
*
293-
* Solution: Reduce maxOutputTokens so that maxOutputTokens + thinkingBudget <= model limit
294-
*
295-
* @param modelString - Full model string (e.g., "anthropic:claude-opus-4-1")
296-
* @param thinkingLevel - Current thinking level
297-
* @param requestedMaxOutputTokens - Optional user-requested maxOutputTokens
298-
* @returns Effective maxOutputTokens that respects model limits with thinking budget
299-
*/
300-
export function calculateEffectiveMaxOutputTokens(
301-
modelString: string,
302-
thinkingLevel: ThinkingLevel,
303-
requestedMaxOutputTokens?: number
304-
): number | undefined {
305-
const [provider] = modelString.split(":");
306-
307-
// Only apply this adjustment for Anthropic models
308-
if (provider !== "anthropic") {
309-
return requestedMaxOutputTokens;
310-
}
311-
312-
// Get the actual thinking level after policy enforcement
313-
const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
314-
const thinkingBudget = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
315-
316-
// Get model's max output tokens from models.json
317-
const modelStats = getModelStats(modelString);
318-
const modelMaxOutput = modelStats?.max_output_tokens;
319-
320-
// If we don't know the model's max output, return requested value
321-
if (!modelMaxOutput) {
322-
log.debug("calculateEffectiveMaxOutputTokens: Unknown model max output, using requested", {
323-
modelString,
324-
requestedMaxOutputTokens,
325-
});
326-
return requestedMaxOutputTokens;
327-
}
328-
329-
// Calculate the maximum safe maxOutputTokens
330-
// The SDK will add thinkingBudget to maxOutputTokens, so we need room for both
331-
const maxSafeOutput = modelMaxOutput - thinkingBudget;
332-
333-
// If user didn't request specific tokens, use the max safe value
334-
const targetOutput = requestedMaxOutputTokens ?? modelMaxOutput;
335-
336-
// Cap at the safe maximum
337-
const effectiveOutput = Math.min(targetOutput, maxSafeOutput);
338-
339-
// Ensure we don't go below a reasonable minimum (1000 tokens)
340-
const finalOutput = Math.max(effectiveOutput, 1000);
341-
342-
log.debug("calculateEffectiveMaxOutputTokens", {
343-
modelString,
344-
thinkingLevel,
345-
effectiveThinking,
346-
thinkingBudget,
347-
modelMaxOutput,
348-
requestedMaxOutputTokens,
349-
maxSafeOutput,
350-
finalOutput,
351-
});
352-
353-
return finalOutput;
354-
}

src/node/services/aiService.ts

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,7 @@ import type { HistoryService } from "./historyService";
3333
import type { PartialService } from "./partialService";
3434
import { buildSystemMessage, readToolInstructions } from "./systemMessage";
3535
import { getTokenizerForModel } from "@/node/utils/main/tokenizer";
36-
import {
37-
buildProviderOptions,
38-
calculateEffectiveMaxOutputTokens,
39-
} from "@/common/utils/ai/providerOptions";
36+
import { buildProviderOptions } from "@/common/utils/ai/providerOptions";
4037
import type { ThinkingLevel } from "@/common/types/thinking";
4138
import type {
4239
StreamAbortEvent,
@@ -929,15 +926,6 @@ export class AIService extends EventEmitter {
929926
effectiveMuxProviderOptions
930927
);
931928

932-
// Calculate effective maxOutputTokens that accounts for thinking budget
933-
// For Anthropic models with extended thinking, the SDK adds thinkingBudget to maxOutputTokens
934-
// so we need to ensure the sum doesn't exceed the model's max_output_tokens limit
935-
const effectiveMaxOutputTokens = calculateEffectiveMaxOutputTokens(
936-
effectiveModelString,
937-
thinkingLevel ?? "off",
938-
maxOutputTokens
939-
);
940-
941929
// Delegate to StreamManager with model instance, system message, tools, historySequence, and initial metadata
942930
const streamResult = await this.streamManager.startStream(
943931
workspaceId,
@@ -955,7 +943,7 @@ export class AIService extends EventEmitter {
955943
mode, // Pass mode so it persists in final history entry
956944
},
957945
providerOptions,
958-
effectiveMaxOutputTokens,
946+
maxOutputTokens,
959947
toolPolicy,
960948
streamToken // Pass the pre-generated stream token
961949
);

0 commit comments

Comments
 (0)