🤖 fix: use effort param only for Claude Opus 4.5

ammar-agent · ammar-agent · commit 8ce486c9c1b7 · 2025-11-24T19:26:16.000-06:00
The effort parameter is a new feature specific to Claude Opus 4.5.
Other Anthropic models (Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.) must use
the thinking.budgetTokens approach for extended thinking.

Changes:
- Re-add ANTHROPIC_THINKING_BUDGETS for non-Opus 4.5 models
- Update buildProviderOptions to detect Opus 4.5 and use effort param
- Non-Opus 4.5 models use thinking.budgetTokens instead
- Add unit tests for provider options builder
- Add integration tests for reasoning with both Sonnet 4.5 and Opus 4.5

_Generated with `mux`_
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
 export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;
 
 /**
- * Anthropic effort level mapping
+ * Anthropic thinking token budget mapping
  *
- * Maps our unified thinking levels to Anthropic's effort parameter:
- * - off: No effort specified (undefined)
- * - low: Most efficient - significant token savings
- * - medium: Balanced approach with moderate token savings
- * - high: Maximum capability (default behavior)
+ * These heuristics balance thinking depth with response time and cost.
+ * Used for models that support extended thinking with budgetTokens
+ * (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.)
  *
- * The effort parameter controls all token spend including thinking,
- * text responses, and tool calls. Unlike budget_tokens, it doesn't require
- * thinking to be explicitly enabled.
+ * - off: No extended thinking
+ * - low: Quick thinking for straightforward tasks (4K tokens)
+ * - medium: Standard thinking for moderate complexity (10K tokens)
+ * - high: Deep thinking for complex problems (20K tokens)
+ */
+export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
+  off: 0,
+  low: 4000,
+  medium: 10000,
+  high: 20000,
+};
+
+/**
+ * Anthropic Opus 4.5 effort parameter mapping
+ *
+ * The effort parameter is a new feature ONLY available for Claude Opus 4.5.
+ * It controls how much computational work the model applies to each task.
+ *
+ * Other Anthropic models must use the thinking.budgetTokens approach instead.
+ *
+ * @see https://www.anthropic.com/news/claude-opus-4-5
  */
 export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
   off: undefined,
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -0,0 +1,119 @@
+/**
+ * Tests for provider options builder
+ */
+
+import { describe, test, expect, mock } from "bun:test";
+import { buildProviderOptions } from "./providerOptions";
+import type { ThinkingLevel } from "@/common/types/thinking";
+
+// Mock the log module to avoid console noise
+void mock.module("@/node/services/log", () => ({
+  log: {
+    debug: (): void => undefined,
+    info: (): void => undefined,
+    warn: (): void => undefined,
+    error: (): void => undefined,
+  },
+}));
+
+// Mock enforceThinkingPolicy to pass through
+void mock.module("@/browser/utils/thinking/policy", () => ({
+  enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level,
+}));
+
+describe("buildProviderOptions - Anthropic", () => {
+  describe("Opus 4.5 (effort parameter)", () => {
+    test("should use effort parameter for claude-opus-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          effort: "medium",
+        },
+      });
+    });
+
+    test("should use effort parameter for claude-opus-4-5-20251101", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          effort: "high",
+        },
+      });
+    });
+
+    test("should omit effort when thinking is off for Opus 4.5", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+        },
+      });
+    });
+  });
+
+  describe("Other Anthropic models (thinking/budgetTokens)", () => {
+    test("should use thinking.budgetTokens for claude-sonnet-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 10000,
+          },
+        },
+      });
+    });
+
+    test("should use thinking.budgetTokens for claude-opus-4-1", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-1", "high");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 20000,
+          },
+        },
+      });
+    });
+
+    test("should use thinking.budgetTokens for claude-haiku-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 4000,
+          },
+        },
+      });
+    });
+
+    test("should omit thinking when thinking is off for non-Opus 4.5", () => {
+      const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+        },
+      });
+    });
+  });
+});
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions";
 import type { ThinkingLevel } from "@/common/types/thinking";
 import {
   ANTHROPIC_EFFORT,
+  ANTHROPIC_THINKING_BUDGETS,
   GEMINI_THINKING_BUDGETS,
   OPENAI_REASONING_EFFORT,
   OPENROUTER_REASONING_EFFORT,
@@ -83,19 +84,53 @@ export function buildProviderOptions(
 
   // Build Anthropic-specific options
   if (provider === "anthropic") {
-    const effort = ANTHROPIC_EFFORT[effectiveThinking];
+    // Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5")
+    const [, modelName] = modelString.split(":");
+
+    // Check if this is Opus 4.5 (supports effort parameter)
+    // Opus 4.5 uses the new "effort" parameter for reasoning control
+    // All other Anthropic models use the "thinking" parameter with budgetTokens
+    const isOpus45 = modelName?.includes("opus-4-5") ?? false;
+
+    if (isOpus45) {
+      // Opus 4.5: Use effort parameter for reasoning control
+      const effort = ANTHROPIC_EFFORT[effectiveThinking];
+      log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
+        effort,
+        thinkingLevel: effectiveThinking,
+      });
+
+      const options: ProviderOptions = {
+        anthropic: {
+          disableParallelToolUse: false, // Always enable concurrent tool execution
+          sendReasoning: true, // Include reasoning traces in requests sent to the model
+          // Use effort parameter (Opus 4.5 only) to control token spend
+          // SDK auto-adds beta header "effort-2025-11-24" when effort is set
+          ...(effort && { effort }),
+        },
+      };
+      log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
+      return options;
+    }
+
+    // Other Anthropic models: Use thinking parameter with budgetTokens
+    const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
     log.debug("buildProviderOptions: Anthropic config", {
-      effort,
+      budgetTokens,
       thinkingLevel: effectiveThinking,
     });
 
     const options: ProviderOptions = {
       anthropic: {
         disableParallelToolUse: false, // Always enable concurrent tool execution
         sendReasoning: true, // Include reasoning traces in requests sent to the model
-        // Use effort parameter to control token spend (thinking, text, and tool calls)
-        // SDK auto-adds beta header "effort-2025-11-24" when effort is set
-        ...(effort && { effort }),
+        // Conditionally add thinking configuration (non-Opus 4.5 models)
+        ...(budgetTokens > 0 && {
+          thinking: {
+            type: "enabled",
+            budgetTokens,
+          },
+        }),
       },
     };
     log.debug("buildProviderOptions: Returning Anthropic options", options);
diff --git a/tests/ipcMain/sendMessage.reasoning.test.ts b/tests/ipcMain/sendMessage.reasoning.test.ts