Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions src/common/types/thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,23 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;

/**
* Anthropic thinking token budget mapping
* Anthropic effort level mapping
*
* These heuristics balance thinking depth with response time and cost:
* - off: No extended thinking
* - low: Quick thinking for straightforward tasks (4K tokens)
* - medium: Standard thinking for moderate complexity (10K tokens)
* - high: Deep thinking for complex problems (20K tokens)
* Maps our unified thinking levels to Anthropic's effort parameter:
* - off: No effort specified (undefined)
* - low: Most efficient - significant token savings
* - medium: Balanced approach with moderate token savings
* - high: Maximum capability (default behavior)
*
* The effort parameter controls all token spend including thinking,
* text responses, and tool calls. Unlike budget_tokens, it doesn't require
* thinking to be explicitly enabled.
*/
export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
off: 0,
low: 4000,
medium: 10000,
high: 20000,
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
off: undefined,
low: "low",
medium: "medium",
high: "high",
};

/**
Expand Down
91 changes: 6 additions & 85 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@ import type { XaiProviderOptions } from "@ai-sdk/xai";
import type { MuxProviderOptions } from "@/common/types/providerOptions";
import type { ThinkingLevel } from "@/common/types/thinking";
import {
ANTHROPIC_THINKING_BUDGETS,
ANTHROPIC_EFFORT,
GEMINI_THINKING_BUDGETS,
OPENAI_REASONING_EFFORT,
OPENROUTER_REASONING_EFFORT,
} from "@/common/types/thinking";
import { log } from "@/node/services/log";
import type { MuxMessage } from "@/common/types/message";
import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy";
import { getModelStats } from "@/common/utils/tokens/modelStats";

/**
* OpenRouter reasoning options
Expand Down Expand Up @@ -84,23 +83,19 @@ export function buildProviderOptions(

// Build Anthropic-specific options
if (provider === "anthropic") {
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
const effort = ANTHROPIC_EFFORT[effectiveThinking];
log.debug("buildProviderOptions: Anthropic config", {
budgetTokens,
effort,
thinkingLevel: effectiveThinking,
});

const options: ProviderOptions = {
anthropic: {
disableParallelToolUse: false, // Always enable concurrent tool execution
sendReasoning: true, // Include reasoning traces in requests sent to the model
// Conditionally add thinking configuration
...(budgetTokens > 0 && {
thinking: {
type: "enabled",
budgetTokens,
},
}),
// Use effort parameter to control token spend (thinking, text, and tool calls)
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
...(effort && { effort }),
},
};
log.debug("buildProviderOptions: Returning Anthropic options", options);
Expand Down Expand Up @@ -278,77 +273,3 @@ export function buildProviderOptions(
log.debug("buildProviderOptions: Unsupported provider", provider);
return {};
}

/**
* Calculate the effective maxOutputTokens for a model based on its limits and thinking budget
*
* For Anthropic models with extended thinking, the AI SDK adds thinkingBudget to maxOutputTokens
* internally. We need to ensure the sum doesn't exceed the model's max_output_tokens limit.
*
* For example, Claude Opus 4 has max_output_tokens=32000. If we use:
* - thinkingBudget=20000 (high)
* - maxOutputTokens=32000
* Then total=52000 which exceeds 32000 → SDK shows warning and caps output
*
* Solution: Reduce maxOutputTokens so that maxOutputTokens + thinkingBudget <= model limit
*
* @param modelString - Full model string (e.g., "anthropic:claude-opus-4-1")
* @param thinkingLevel - Current thinking level
* @param requestedMaxOutputTokens - Optional user-requested maxOutputTokens
* @returns Effective maxOutputTokens that respects model limits with thinking budget
*/
export function calculateEffectiveMaxOutputTokens(
modelString: string,
thinkingLevel: ThinkingLevel,
requestedMaxOutputTokens?: number
): number | undefined {
const [provider] = modelString.split(":");

// Only apply this adjustment for Anthropic models
if (provider !== "anthropic") {
return requestedMaxOutputTokens;
}

// Get the actual thinking level after policy enforcement
const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
const thinkingBudget = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];

// Get model's max output tokens from models.json
const modelStats = getModelStats(modelString);
const modelMaxOutput = modelStats?.max_output_tokens;

// If we don't know the model's max output, return requested value
if (!modelMaxOutput) {
log.debug("calculateEffectiveMaxOutputTokens: Unknown model max output, using requested", {
modelString,
requestedMaxOutputTokens,
});
return requestedMaxOutputTokens;
}

// Calculate the maximum safe maxOutputTokens
// The SDK will add thinkingBudget to maxOutputTokens, so we need room for both
const maxSafeOutput = modelMaxOutput - thinkingBudget;

// If user didn't request specific tokens, use the max safe value
const targetOutput = requestedMaxOutputTokens ?? modelMaxOutput;

// Cap at the safe maximum
const effectiveOutput = Math.min(targetOutput, maxSafeOutput);

// Ensure we don't go below a reasonable minimum (1000 tokens)
const finalOutput = Math.max(effectiveOutput, 1000);

log.debug("calculateEffectiveMaxOutputTokens", {
modelString,
thinkingLevel,
effectiveThinking,
thinkingBudget,
modelMaxOutput,
requestedMaxOutputTokens,
maxSafeOutput,
finalOutput,
});

return finalOutput;
}
16 changes: 2 additions & 14 deletions src/node/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ import type { HistoryService } from "./historyService";
import type { PartialService } from "./partialService";
import { buildSystemMessage, readToolInstructions } from "./systemMessage";
import { getTokenizerForModel } from "@/node/utils/main/tokenizer";
import {
buildProviderOptions,
calculateEffectiveMaxOutputTokens,
} from "@/common/utils/ai/providerOptions";
import { buildProviderOptions } from "@/common/utils/ai/providerOptions";
import type { ThinkingLevel } from "@/common/types/thinking";
import type {
StreamAbortEvent,
Expand Down Expand Up @@ -929,15 +926,6 @@ export class AIService extends EventEmitter {
effectiveMuxProviderOptions
);

// Calculate effective maxOutputTokens that accounts for thinking budget
// For Anthropic models with extended thinking, the SDK adds thinkingBudget to maxOutputTokens
// so we need to ensure the sum doesn't exceed the model's max_output_tokens limit
const effectiveMaxOutputTokens = calculateEffectiveMaxOutputTokens(
effectiveModelString,
thinkingLevel ?? "off",
maxOutputTokens
);

// Delegate to StreamManager with model instance, system message, tools, historySequence, and initial metadata
const streamResult = await this.streamManager.startStream(
workspaceId,
Expand All @@ -955,7 +943,7 @@ export class AIService extends EventEmitter {
mode, // Pass mode so it persists in final history entry
},
providerOptions,
effectiveMaxOutputTokens,
maxOutputTokens,
toolPolicy,
streamToken // Pass the pre-generated stream token
);
Expand Down