Skip to content

Commit 184f1af

Browse files
committed
🤖 fix: update Claude Opus 4.5 thinking policy to match API
Claude Opus 4.5 uses the effort parameter which only supports low/medium/high (no 'off' option). Update thinking policy to reflect this: - Return ['low', 'medium', 'high'] for opus-4-5 models - Fallback to 'high' (instead of 'medium') when 'off' is requested - Add comprehensive test coverage for Opus 4.5 policy - Add Storybook story showcasing 3-position slider This aligns the UI with the Anthropic API's capabilities, similar to how Gemini 3 is handled with ['low', 'high']. Fixes the issue where users could select 'off' for Opus 4.5, which would result in no effort parameter being sent to the API. _Generated with `mux`_ Change-Id: If402fe10a6061ce21dac4eb23a29ca58a9ca3613 Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent 8f176aa commit 184f1af

File tree

3 files changed

+88
-6
lines changed

3 files changed

+88
-6
lines changed

src/browser/components/ThinkingSlider.stories.tsx

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,27 @@ export const DifferentModels: Story = {
4141
render: () => (
4242
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
4343
<div>
44-
<div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
44+
<div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5 (4 levels)</div>
4545
<ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
4646
</div>
4747

4848
<div>
49-
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
49+
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (3 levels: low/medium/high)</div>
50+
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
51+
</div>
52+
53+
<div>
54+
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
5055
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
5156
</div>
5257

5358
<div>
54-
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
59+
<div className="text-muted-light font-primary mb-2 text-xs">Gemini 3 (2 levels: low/high)</div>
60+
<ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
61+
</div>
62+
63+
<div>
64+
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
5565
<ThinkingSliderComponent modelString="openai:gpt-5-codex" />
5666
</div>
5767
</div>
@@ -92,6 +102,33 @@ export const InteractiveDemo: Story = {
92102
},
93103
};
94104

105+
export const Opus45ThreeLevels: Story = {
106+
args: { modelString: "anthropic:claude-opus-4-5" },
107+
render: (args) => (
108+
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
109+
<div className="text-bright font-primary mb-2.5 text-[13px]">
110+
Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
111+
</div>
112+
<ThinkingSliderComponent modelString={args.modelString} />
113+
<div className="text-muted-light font-primary mt-2.5 text-[11px]">
114+
<strong>Low</strong>: Conservative token usage
115+
<br /><strong>Medium</strong>: Balanced usage (default)
116+
<br /><strong>High</strong>: Best results, more tokens
117+
</div>
118+
</div>
119+
),
120+
play: async ({ canvasElement }) => {
121+
const canvas = within(canvasElement);
122+
123+
// Find the slider
124+
const slider = canvas.getByRole("slider");
125+
126+
// Verify slider is present with 3 levels (0-2)
127+
await expect(slider).toBeInTheDocument();
128+
await expect(slider).toHaveAttribute("max", "2");
129+
},
130+
};
131+
95132
export const LockedThinking: Story = {
96133
args: { modelString: "openai:gpt-5-pro" },
97134
render: (args) => (

src/browser/utils/thinking/policy.test.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
3333
]);
3434
});
3535

36+
test("returns low/medium/high for Opus 4.5", () => {
37+
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
38+
"low",
39+
"medium",
40+
"high",
41+
]);
42+
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
43+
"low",
44+
"medium",
45+
"high",
46+
]);
47+
});
48+
49+
test("returns low/high for Gemini 3", () => {
50+
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
51+
});
52+
3653
test("returns all levels for other providers", () => {
3754
expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
3855
"off",
@@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
4663
"medium",
4764
"high",
4865
]);
49-
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
5066
});
5167
});
5268

@@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
7894
expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
7995
});
8096
});
97+
98+
describe("Opus 4.5 (no off option)", () => {
99+
test("allows low/medium/high levels", () => {
100+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
101+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
102+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
103+
});
104+
105+
test("falls back to high when off is requested", () => {
106+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
107+
});
108+
109+
test("falls back to high when off is requested (versioned model)", () => {
110+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
111+
});
112+
});
81113
});
82114

83115
// Note: Tests for invalid levels removed - TypeScript type system prevents invalid

src/browser/utils/thinking/policy.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
2525
*
2626
* Rules:
2727
* - openai:gpt-5-pro → ["high"] (only supported level)
28+
* - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
29+
* - gemini-3 → ["low", "high"] (thinking level only)
2830
* - default → ["off", "low", "medium", "high"] (all levels selectable)
2931
*
3032
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
3739
return ["high"];
3840
}
3941

42+
// Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
43+
// Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
44+
if (modelString.includes("opus-4-5")) {
45+
return ["low", "medium", "high"];
46+
}
47+
4048
// Gemini 3 Pro only supports "low" and "high" reasoning levels
4149
if (modelString.includes("gemini-3")) {
4250
return ["low", "high"];
@@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
5159
*
5260
* Fallback strategy:
5361
* 1. If requested level is allowed, use it
54-
* 2. If "medium" is allowed, use it (reasonable default)
55-
* 3. Otherwise use first allowed level
62+
* 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
63+
* 3. Otherwise: prefer "medium" if allowed, else use first allowed level
5664
*/
5765
export function enforceThinkingPolicy(
5866
modelString: string,
@@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
6472
return requested;
6573
}
6674

75+
// Special case: Opus 4.5 defaults to "high" for best experience
76+
if (modelString.includes("opus-4-5") && allowed.includes("high")) {
77+
return "high";
78+
}
79+
6780
// Fallback: prefer "medium" if allowed, else use first allowed level
6881
return allowed.includes("medium") ? "medium" : allowed[0];
6982
}

0 commit comments

Comments
 (0)