🤖 feat: add GPT-5.1-Codex-Max model with xhigh reasoning level

ThomasK33 · ThomasK33 · commit ecc444089b90 · 2025-12-05T19:24:45.000+01:00
Add support for OpenAI's gpt-5.1-codex-max model which introduces a new
'xhigh' (Extra High) reasoning effort level for extended deep thinking.

Changes:
- Extended ThinkingLevel type to include 'xhigh' as 5th option
- Added GPT_CODEX_MAX to known models with codex-max alias
- Updated thinking policy to return 5 levels for codex-max only
- Added xhigh to UI level descriptions and command palette
- Added model pricing/capability data to models-extra
- Updated SendMessageOptionsSchema to accept xhigh
- Added comprehensive tests for codex-max policy

The xhigh level is exclusive to gpt-5.1-codex-max. Other models
gracefully fall back to their maximum supported level when xhigh
is requested.

Change-Id: Iab7ba7187703e275c4c0aa76779381dff4006316
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx
@@ -513,6 +513,7 @@ export const ChatInput: React.FC<ChatInputProps> = (props) => {
         low: "Low — adds light reasoning",
         medium: "Medium — balanced reasoning",
         high: "High — maximum reasoning depth",
+        xhigh: "Extra High — extended deep thinking",
       };
 
       setToast({
diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts
@@ -50,7 +50,7 @@ export interface BuildSourcesParams {
   onOpenSettings?: (section?: string) => void;
 }
 
-const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high"];
+const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high", "xhigh"];
 
 /**
  * Command palette section names
@@ -431,6 +431,7 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi
         low: "Low — add a bit of reasoning",
         medium: "Medium — balanced reasoning",
         high: "High — maximum reasoning depth",
+        xhigh: "Extra High — extended deep thinking",
       };
       const currentLevel = p.getThinkingLevel(workspaceId);
 
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -2,6 +2,36 @@ import { describe, expect, test } from "bun:test";
 import { getThinkingPolicyForModel, enforceThinkingPolicy } from "./policy";
 
 describe("getThinkingPolicyForModel", () => {
+  test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => {
+    expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for gpt-5.1-codex-max with version suffix", () => {
+    expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max-2025-12-01")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for gpt-5.1-codex-max with whitespace after colon", () => {
+    expect(getThinkingPolicyForModel("openai: gpt-5.1-codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
   test("returns single HIGH for gpt-5-pro base model", () => {
     expect(getThinkingPolicyForModel("openai:gpt-5-pro")).toEqual(["high"]);
   });
@@ -111,6 +141,32 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
     });
   });
+
+  describe("GPT-5.1-Codex-Max (5 levels including xhigh)", () => {
+    test("allows all 5 levels including xhigh", () => {
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "off")).toBe("off");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "low")).toBe("low");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "medium")).toBe("medium");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "high")).toBe("high");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "xhigh")).toBe("xhigh");
+    });
+
+    test("allows xhigh for versioned model", () => {
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max-2025-12-01", "xhigh")).toBe("xhigh");
+    });
+  });
+
+  describe("xhigh fallback for non-codex-max models", () => {
+    test("falls back to medium when xhigh requested on standard model", () => {
+      // Standard models don't support xhigh, so fall back to medium (preferred fallback)
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "xhigh")).toBe("medium");
+    });
+
+    test("falls back to high when xhigh requested on gpt-5-pro", () => {
+      // gpt-5-pro only supports high, so xhigh falls back to high
+      expect(enforceThinkingPolicy("openai:gpt-5-pro", "xhigh")).toBe("high");
+    });
+  });
 });
 
 // Note: Tests for invalid levels removed - TypeScript type system prevents invalid
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -24,14 +24,21 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  * Returns the thinking policy for a given model.
  *
  * Rules:
+ * - openai:gpt-5.1-codex-max → ["off", "low", "medium", "high", "xhigh"] (5 levels including xhigh)
  * - openai:gpt-5-pro → ["high"] (only supported level)
  * - gemini-3 → ["low", "high"] (thinking level only)
- * - default → ["off", "low", "medium", "high"] (all levels selectable)
+ * - default → ["off", "low", "medium", "high"] (standard 4 levels)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
  * Does NOT match gpt-5-pro-mini (uses negative lookahead).
  */
 export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
+  // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High)
+  // Match "openai:" followed by optional whitespace and "gpt-5.1-codex-max"
+  if (/^openai:\s*gpt-5\.1-codex-max/.test(modelString)) {
+    return ["off", "low", "medium", "high", "xhigh"];
+  }
+
   // Match "openai:" followed by optional whitespace and "gpt-5-pro"
   // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes
   if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) {
@@ -43,7 +50,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["low", "high"];
   }
 
-  // Default policy: all levels selectable
+  // Default policy: standard 4 levels (xhigh only for codex-max)
   return ["off", "low", "medium", "high"];
 }
 
diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts
@@ -70,6 +70,13 @@ const MODEL_DEFINITIONS = {
     providerModelId: "gpt-5.1-codex-mini",
     aliases: ["codex-mini"],
   },
+  GPT_CODEX_MAX: {
+    provider: "openai",
+    providerModelId: "gpt-5.1-codex-max",
+    aliases: ["codex-max"],
+    warm: true,
+    tokenizerOverride: "openai/gpt-5",
+  },
   GEMINI_3_PRO: {
     provider: "google",
     providerModelId: "gemini-3-pro-preview",
diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts
@@ -313,7 +313,7 @@ export const ToolPolicySchema = z.array(ToolPolicyFilterSchema).meta({
 // SendMessage options
 export const SendMessageOptionsSchema = z.object({
   editMessageId: z.string().optional(),
-  thinkingLevel: z.enum(["off", "low", "medium", "high"]).optional(),
+  thinkingLevel: z.enum(["off", "low", "medium", "high", "xhigh"]).optional(),
   model: z.string("No model specified"),
   toolPolicy: ToolPolicySchema.optional(),
   additionalSystemInstructions: z.string().optional(),
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -5,7 +5,7 @@
  * different AI providers (Anthropic, OpenAI, etc.)
  */
 
-export type ThinkingLevel = "off" | "low" | "medium" | "high";
+export type ThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh";
 
 /**
  * Active thinking levels (excludes "off")
@@ -30,6 +30,7 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
   low: 4000,
   medium: 10000,
   high: 20000,
+  xhigh: 20000, // Same as high - Anthropic doesn't support xhigh
 };
 
 /**
@@ -47,6 +48,7 @@ export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high">
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "high", // Fallback to high - Anthropic doesn't support xhigh
 };
 
 /**
@@ -66,6 +68,7 @@ export const OPENAI_REASONING_EFFORT: Record<ThinkingLevel, string | undefined>
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "xhigh", // Extra High - only supported by gpt-5.1-codex-max
 };
 
 /**
@@ -83,6 +86,7 @@ export const GEMINI_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
   low: 2048,
   medium: 8192,
   high: 16384, // Conservative max (some models go to 32k)
+  xhigh: 16384, // Same as high - Gemini doesn't support xhigh
 } as const;
 export const OPENROUTER_REASONING_EFFORT: Record<
   ThinkingLevel,
@@ -92,4 +96,5 @@ export const OPENROUTER_REASONING_EFFORT: Record<
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "high", // Fallback to high - OpenRouter doesn't support xhigh
 };
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -254,8 +254,11 @@ export function buildProviderOptions(
       };
 
       if (isGemini3) {
-        // Gemini 3 uses thinkingLevel (low/high)
-        thinkingConfig.thinkingLevel = effectiveThinking === "medium" ? "low" : effectiveThinking;
+        // Gemini 3 uses thinkingLevel (low/high) - map medium/xhigh to supported values
+        thinkingConfig.thinkingLevel =
+          effectiveThinking === "medium" || effectiveThinking === "xhigh"
+            ? "high"
+            : effectiveThinking;
       } else {
         // Gemini 2.5 uses thinkingBudget
         const budget = GEMINI_THINKING_BUDGETS[effectiveThinking];
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
@@ -88,4 +88,22 @@ export const modelsExtra: Record<string, ModelData> = {
     supports_reasoning: true,
     supports_response_schema: true,
   },
+
+  // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support
+  // Pricing TBD - using estimated values based on Codex pricing pattern
+  // Supports 5 reasoning levels: off, low, medium, high, xhigh
+  "gpt-5.1-codex-max": {
+    max_input_tokens: 400000, // Estimated based on compaction capability
+    max_output_tokens: 272000, // Same as gpt-5-pro
+    input_cost_per_token: 0.00002, // $20/M - placeholder estimate
+    output_cost_per_token: 0.00008, // $80/M - placeholder estimate
+    litellm_provider: "openai",
+    mode: "chat",
+    supports_function_calling: true,
+    supports_vision: true,
+    supports_reasoning: true,
+    supports_response_schema: true,
+    knowledge_cutoff: "2025-06-30", // Estimated
+    supported_endpoints: ["/v1/responses"],
+  },
 };