🤖 fix: update Claude Opus 4.5 thinking policy to match API

ThomasK33 · ThomasK33 · commit 184f1af5d53c · 2025-11-25T11:10:54.000+01:00
Claude Opus 4.5 uses the effort parameter which only supports low/medium/high
(no 'off' option). Update thinking policy to reflect this:

- Return ['low', 'medium', 'high'] for opus-4-5 models
- Fallback to 'high' (instead of 'medium') when 'off' is requested
- Add comprehensive test coverage for Opus 4.5 policy
- Add Storybook story showcasing 3-position slider

This aligns the UI with the Anthropic API's capabilities, similar to how
Gemini 3 is handled with ['low', 'high'].

Fixes the issue where users could select 'off' for Opus 4.5, which would
result in no effort parameter being sent to the API.

_Generated with `mux`_

Change-Id: If402fe10a6061ce21dac4eb23a29ca58a9ca3613
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
@@ -41,17 +41,27 @@ export const DifferentModels: Story = {
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (3 levels: low/medium/high)</div>
+        <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Gemini 3 (2 levels: low/high)</div>
+        <ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
         <ThinkingSliderComponent modelString="openai:gpt-5-codex" />
       </div>
     </div>
@@ -92,6 +102,33 @@ export const InteractiveDemo: Story = {
   },
 };
 
+export const Opus45ThreeLevels: Story = {
+  args: { modelString: "anthropic:claude-opus-4-5" },
+  render: (args) => (
+    <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
+      <div className="text-bright font-primary mb-2.5 text-[13px]">
+        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+      </div>
+      <ThinkingSliderComponent modelString={args.modelString} />
+      <div className="text-muted-light font-primary mt-2.5 text-[11px]">
+        • <strong>Low</strong>: Conservative token usage
+        <br />• <strong>Medium</strong>: Balanced usage (default)
+        <br />• <strong>High</strong>: Best results, more tokens
+      </div>
+    </div>
+  ),
+  play: async ({ canvasElement }) => {
+    const canvas = within(canvasElement);
+
+    // Find the slider
+    const slider = canvas.getByRole("slider");
+
+    // Verify slider is present with 3 levels (0-2)
+    await expect(slider).toBeInTheDocument();
+    await expect(slider).toHaveAttribute("max", "2");
+  },
+};
+
 export const LockedThinking: Story = {
   args: { modelString: "openai:gpt-5-pro" },
   render: (args) => (
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns low/medium/high for Opus 4.5", () => {
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns low/high for Gemini 3", () => {
+    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
+  });
+
   test("returns all levels for other providers", () => {
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
       "off",
@@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
       "medium",
       "high",
     ]);
-    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
   });
 });
 
@@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
     });
   });
+
+  describe("Opus 4.5 (no off option)", () => {
+    test("allows low/medium/high levels", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
+    });
+
+    test("falls back to high when off is requested", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
+    });
+
+    test("falls back to high when off is requested (versioned model)", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    });
+  });
 });
 
 // Note: Tests for invalid levels removed - TypeScript type system prevents invalid
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
+ * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
+ * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
+  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
+  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
+  if (modelString.includes("opus-4-5")) {
+    return ["low", "medium", "high"];
+  }
+
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. If "medium" is allowed, use it (reasonable default)
- * 3. Otherwise use first allowed level
+ * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
+ * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
+  // Special case: Opus 4.5 defaults to "high" for best experience
+  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
+    return "high";
+  }
+
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }