diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx index b25b7c7867..a36bec3587 100644 --- a/src/browser/components/ThinkingSlider.stories.tsx +++ b/src/browser/components/ThinkingSlider.stories.tsx @@ -48,9 +48,7 @@ export const DifferentModels: Story = {
-
- Claude Opus 4.5 (3 levels: low/medium/high) -
+
Claude Opus 4.5 (4 levels)
@@ -116,18 +114,19 @@ export const InteractiveDemo: Story = { }, }; -export const Opus45ThreeLevels: Story = { +export const Opus45AllLevels: Story = { args: { modelString: "anthropic:claude-opus-4-5" }, render: (args) => (
- Claude Opus 4.5 uses the effort parameter (low/medium/high only, no “off”): + Claude Opus 4.5 uses the effort parameter with optional extended thinking:
- • Low: Conservative token usage -
Medium: Balanced usage (default) -
High: Best results, more tokens + • Off: effort=“low”, no visible reasoning +
Low: effort=“low”, visible reasoning +
Medium: effort=“medium”, visible reasoning +
High: effort=“high”, visible reasoning
), diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts index 974fca0c63..86bc4a9030 100644 --- a/src/browser/utils/thinking/policy.test.ts +++ b/src/browser/utils/thinking/policy.test.ts @@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => { ]); }); - test("returns low/medium/high for Opus 4.5", () => { + test("returns all levels for Opus 4.5 (uses default policy)", () => { + // Opus 4.5 uses the default policy - no special case needed + // The effort parameter handles the "off" case by setting effort="low" expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([ + "off", "low", "medium", "high", ]); expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([ + "off", "low", "medium", "high", @@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => { }); }); - describe("Opus 4.5 (no off option)", () => { - test("allows low/medium/high levels", () => { + describe("Opus 4.5 (all levels supported)", () => { + test("allows all levels including off", () => { + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high"); }); - test("falls back to high when off is requested", () => { - expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high"); - }); - - test("falls back to high when off is requested (versioned model)", () => { - expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high"); + test("allows off for versioned model", () => { + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off"); }); }); }); diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index 41c2fad4fd..4346d9272d 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; * * Rules: * - openai:gpt-5-pro → ["high"] (only supported level) - * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only) * - gemini-3 → ["low", "high"] (thinking level only) * - default → ["off", "low", "medium", "high"] (all levels selectable) * @@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { return ["high"]; } - // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off") - // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix - if (modelString.includes("opus-4-5")) { - return ["low", "medium", "high"]; - } - // Gemini 3 Pro only supports "low" and "high" reasoning levels if (modelString.includes("gemini-3")) { return ["low", "high"]; @@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { * * Fallback strategy: * 1. If requested level is allowed, use it - * 2. For Opus 4.5: prefer "high" (best experience for reasoning model) - * 3. Otherwise: prefer "medium" if allowed, else use first allowed level + * 2. Otherwise: prefer "medium" if allowed, else use first allowed level */ export function enforceThinkingPolicy( modelString: string, @@ -72,11 +64,6 @@ export function enforceThinkingPolicy( return requested; } - // Special case: Opus 4.5 defaults to "high" for best experience - if (modelString.includes("opus-4-5") && allowed.includes("high")) { - return "high"; - } - // Fallback: prefer "medium" if allowed, else use first allowed level return allowed.includes("medium") ? "medium" : allowed[0]; } diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts index c2b025c876..f6283d067a 100644 --- a/src/common/types/thinking.ts +++ b/src/common/types/thinking.ts @@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record = { * * @see https://www.anthropic.com/news/claude-opus-4-5 */ -export const ANTHROPIC_EFFORT: Record = { - off: undefined, +export const ANTHROPIC_EFFORT: Record = { + off: "low", low: "low", medium: "medium", high: "high", diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 6283269f37..a62356bec6 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({ describe("buildProviderOptions - Anthropic", () => { describe("Opus 4.5 (effort parameter)", () => { - test("should use effort parameter for claude-opus-4-5", () => { + test("should use effort and thinking parameters for claude-opus-4-5", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium + }, effort: "medium", }, }); }); - test("should use effort parameter for claude-opus-4-5-20251101", () => { + test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high + }, effort: "high", }, }); }); - test("should omit effort when thinking is off for Opus 4.5", () => { + test("should use effort 'low' with no thinking when off for Opus 4.5", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5", "off"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + effort: "low", // "off" maps to effort: "low" for efficiency }, }); }); diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 5eb54540ca..769acbd926 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -93,10 +93,16 @@ export function buildProviderOptions( const isOpus45 = modelName?.includes("opus-4-5") ?? false; if (isOpus45) { - // Opus 4.5: Use effort parameter for reasoning control - const effort = ANTHROPIC_EFFORT[effectiveThinking]; + // Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning + // - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off) + // - "low" → effort: "low", thinking enabled (visible reasoning) + // - "medium" → effort: "medium", thinking enabled + // - "high" → effort: "high", thinking enabled + const effortLevel = ANTHROPIC_EFFORT[effectiveThinking]; + const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking]; log.debug("buildProviderOptions: Anthropic Opus 4.5 config", { - effort, + effort: effortLevel, + budgetTokens, thinkingLevel: effectiveThinking, }); @@ -104,9 +110,17 @@ export function buildProviderOptions( anthropic: { disableParallelToolUse: false, // Always enable concurrent tool execution sendReasoning: true, // Include reasoning traces in requests sent to the model + // Enable thinking to get visible reasoning traces (only when not "off") + // budgetTokens sets the ceiling; effort controls how eagerly tokens are spent + ...(budgetTokens > 0 && { + thinking: { + type: "enabled", + budgetTokens, + }, + }), // Use effort parameter (Opus 4.5 only) to control token spend // SDK auto-adds beta header "effort-2025-11-24" when effort is set - ...(effort && { effort }), + effort: effortLevel, }, }; log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);