Skip to content

Commit 8ce486c

Browse files
committed
🤖 fix: use effort param only for Claude Opus 4.5
The effort parameter is a new feature specific to Claude Opus 4.5. Other Anthropic models (Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.) must use the thinking.budgetTokens approach for extended thinking. Changes: - Re-add ANTHROPIC_THINKING_BUDGETS for non-Opus 4.5 models - Update buildProviderOptions to detect Opus 4.5 and use effort param - Non-Opus 4.5 models use thinking.budgetTokens instead - Add unit tests for provider options builder - Add integration tests for reasoning with both Sonnet 4.5 and Opus 4.5 _Generated with `mux`_
1 parent 05ce4b6 commit 8ce486c

File tree

4 files changed

+395
-14
lines changed

4 files changed

+395
-14
lines changed

src/common/types/thinking.ts

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
1414
export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;
1515

1616
/**
17-
* Anthropic effort level mapping
17+
* Anthropic thinking token budget mapping
1818
*
19-
* Maps our unified thinking levels to Anthropic's effort parameter:
20-
* - off: No effort specified (undefined)
21-
* - low: Most efficient - significant token savings
22-
* - medium: Balanced approach with moderate token savings
23-
* - high: Maximum capability (default behavior)
19+
* These heuristics balance thinking depth with response time and cost.
20+
* Used for models that support extended thinking with budgetTokens
21+
* (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.)
2422
*
25-
* The effort parameter controls all token spend including thinking,
26-
* text responses, and tool calls. Unlike budget_tokens, it doesn't require
27-
* thinking to be explicitly enabled.
23+
* - off: No extended thinking
24+
* - low: Quick thinking for straightforward tasks (4K tokens)
25+
* - medium: Standard thinking for moderate complexity (10K tokens)
26+
* - high: Deep thinking for complex problems (20K tokens)
27+
*/
28+
export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
29+
off: 0,
30+
low: 4000,
31+
medium: 10000,
32+
high: 20000,
33+
};
34+
35+
/**
36+
* Anthropic Opus 4.5 effort parameter mapping
37+
*
38+
* The effort parameter is a new feature ONLY available for Claude Opus 4.5.
39+
* It controls how much computational work the model applies to each task.
40+
*
41+
* Other Anthropic models must use the thinking.budgetTokens approach instead.
42+
*
43+
* @see https://www.anthropic.com/news/claude-opus-4-5
2844
*/
2945
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
3046
off: undefined,
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**
2+
* Tests for provider options builder
3+
*/
4+
5+
import { describe, test, expect, mock } from "bun:test";
6+
import { buildProviderOptions } from "./providerOptions";
7+
import type { ThinkingLevel } from "@/common/types/thinking";
8+
9+
// Mock the log module to avoid console noise
10+
void mock.module("@/node/services/log", () => ({
11+
log: {
12+
debug: (): void => undefined,
13+
info: (): void => undefined,
14+
warn: (): void => undefined,
15+
error: (): void => undefined,
16+
},
17+
}));
18+
19+
// Mock enforceThinkingPolicy to pass through
20+
void mock.module("@/browser/utils/thinking/policy", () => ({
21+
enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level,
22+
}));
23+
24+
describe("buildProviderOptions - Anthropic", () => {
25+
describe("Opus 4.5 (effort parameter)", () => {
26+
test("should use effort parameter for claude-opus-4-5", () => {
27+
const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
28+
29+
expect(result).toEqual({
30+
anthropic: {
31+
disableParallelToolUse: false,
32+
sendReasoning: true,
33+
effort: "medium",
34+
},
35+
});
36+
});
37+
38+
test("should use effort parameter for claude-opus-4-5-20251101", () => {
39+
const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
40+
41+
expect(result).toEqual({
42+
anthropic: {
43+
disableParallelToolUse: false,
44+
sendReasoning: true,
45+
effort: "high",
46+
},
47+
});
48+
});
49+
50+
test("should omit effort when thinking is off for Opus 4.5", () => {
51+
const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
52+
53+
expect(result).toEqual({
54+
anthropic: {
55+
disableParallelToolUse: false,
56+
sendReasoning: true,
57+
},
58+
});
59+
});
60+
});
61+
62+
describe("Other Anthropic models (thinking/budgetTokens)", () => {
63+
test("should use thinking.budgetTokens for claude-sonnet-4-5", () => {
64+
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium");
65+
66+
expect(result).toEqual({
67+
anthropic: {
68+
disableParallelToolUse: false,
69+
sendReasoning: true,
70+
thinking: {
71+
type: "enabled",
72+
budgetTokens: 10000,
73+
},
74+
},
75+
});
76+
});
77+
78+
test("should use thinking.budgetTokens for claude-opus-4-1", () => {
79+
const result = buildProviderOptions("anthropic:claude-opus-4-1", "high");
80+
81+
expect(result).toEqual({
82+
anthropic: {
83+
disableParallelToolUse: false,
84+
sendReasoning: true,
85+
thinking: {
86+
type: "enabled",
87+
budgetTokens: 20000,
88+
},
89+
},
90+
});
91+
});
92+
93+
test("should use thinking.budgetTokens for claude-haiku-4-5", () => {
94+
const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low");
95+
96+
expect(result).toEqual({
97+
anthropic: {
98+
disableParallelToolUse: false,
99+
sendReasoning: true,
100+
thinking: {
101+
type: "enabled",
102+
budgetTokens: 4000,
103+
},
104+
},
105+
});
106+
});
107+
108+
test("should omit thinking when thinking is off for non-Opus 4.5", () => {
109+
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off");
110+
111+
expect(result).toEqual({
112+
anthropic: {
113+
disableParallelToolUse: false,
114+
sendReasoning: true,
115+
},
116+
});
117+
});
118+
});
119+
});

src/common/utils/ai/providerOptions.ts

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions";
1212
import type { ThinkingLevel } from "@/common/types/thinking";
1313
import {
1414
ANTHROPIC_EFFORT,
15+
ANTHROPIC_THINKING_BUDGETS,
1516
GEMINI_THINKING_BUDGETS,
1617
OPENAI_REASONING_EFFORT,
1718
OPENROUTER_REASONING_EFFORT,
@@ -83,19 +84,53 @@ export function buildProviderOptions(
8384

8485
// Build Anthropic-specific options
8586
if (provider === "anthropic") {
86-
const effort = ANTHROPIC_EFFORT[effectiveThinking];
87+
// Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5")
88+
const [, modelName] = modelString.split(":");
89+
90+
// Check if this is Opus 4.5 (supports effort parameter)
91+
// Opus 4.5 uses the new "effort" parameter for reasoning control
92+
// All other Anthropic models use the "thinking" parameter with budgetTokens
93+
const isOpus45 = modelName?.includes("opus-4-5") ?? false;
94+
95+
if (isOpus45) {
96+
// Opus 4.5: Use effort parameter for reasoning control
97+
const effort = ANTHROPIC_EFFORT[effectiveThinking];
98+
log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
99+
effort,
100+
thinkingLevel: effectiveThinking,
101+
});
102+
103+
const options: ProviderOptions = {
104+
anthropic: {
105+
disableParallelToolUse: false, // Always enable concurrent tool execution
106+
sendReasoning: true, // Include reasoning traces in requests sent to the model
107+
// Use effort parameter (Opus 4.5 only) to control token spend
108+
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
109+
...(effort && { effort }),
110+
},
111+
};
112+
log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
113+
return options;
114+
}
115+
116+
// Other Anthropic models: Use thinking parameter with budgetTokens
117+
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
87118
log.debug("buildProviderOptions: Anthropic config", {
88-
effort,
119+
budgetTokens,
89120
thinkingLevel: effectiveThinking,
90121
});
91122

92123
const options: ProviderOptions = {
93124
anthropic: {
94125
disableParallelToolUse: false, // Always enable concurrent tool execution
95126
sendReasoning: true, // Include reasoning traces in requests sent to the model
96-
// Use effort parameter to control token spend (thinking, text, and tool calls)
97-
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
98-
...(effort && { effort }),
127+
// Conditionally add thinking configuration (non-Opus 4.5 models)
128+
...(budgetTokens > 0 && {
129+
thinking: {
130+
type: "enabled",
131+
budgetTokens,
132+
},
133+
}),
99134
},
100135
};
101136
log.debug("buildProviderOptions: Returning Anthropic options", options);

0 commit comments

Comments
 (0)