diff --git a/bun.lock b/bun.lock index 8cfd4bb451..7b3e933865 100644 --- a/bun.lock +++ b/bun.lock @@ -1,16 +1,15 @@ { "lockfileVersion": 1, - "configVersion": 0, "workspaces": { "": { "name": "@coder/cmux", "dependencies": { - "@ai-sdk/anthropic": "^2.0.44", - "@ai-sdk/google": "^2.0.38", - "@ai-sdk/openai": "^2.0.66", - "@ai-sdk/xai": "^2.0.33", + "@ai-sdk/anthropic": "^2.0.47", + "@ai-sdk/google": "^2.0.43", + "@ai-sdk/openai": "^2.0.72", + "@ai-sdk/xai": "^2.0.36", "@lydell/node-pty": "1.1.0", - "@openrouter/ai-sdk-provider": "^1.2.2", + "@openrouter/ai-sdk-provider": "^1.2.5", "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", @@ -22,7 +21,7 @@ "@radix-ui/react-tabs": "^1.1.13", "@radix-ui/react-toggle-group": "^1.1.11", "@radix-ui/react-tooltip": "^1.2.8", - "ai": "^5.0.93", + "ai": "^5.0.101", "ai-tokenizer": "^1.0.4", "chalk": "^5.6.2", "cors": "^2.8.5", @@ -137,13 +136,13 @@ "@adobe/css-tools": ["@adobe/css-tools@4.4.4", "", {}, "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg=="], - "@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.44", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-o8TfNXRzO/KZkBrcx+CL9LQsPhx7PHyqzUGjza3TJaF9WxfH1S5UQLAmEw8F7lQoHNLU0IX03WT8o8R/4JbUxQ=="], + "@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.47", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-YioBDTTQ6z2fijcOByG6Gj7me0ITqaJACprHROis7fXFzYIBzyAwxhsCnOrXO+oXv+9Ixddgy/Cahdmu84uRvQ=="], - "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.10", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17", "@vercel/oidc": "3.0.3" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-c++qOKfjKokTPAJ+vP9UXXNuTQ819yEDCZVXBhpZbgRly1P4fHTJbIAwuh+Qxxe9Bmtu8PEta0JGYZxc+hm7/Q=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.15", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-i1YVKzC1dg9LGvt+GthhD7NlRhz9J4+ZRj3KELU14IZ/MHPsOBiFeEoCCIDLR+3tqT8/+5nIsK3eZ7DFRfMfdw=="], - "@ai-sdk/google": ["@ai-sdk/google@2.0.38", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-z+RFCxRA/dSd3eCkGBlnk79nz3jv8vwaW42gVc+qDuMofNfvjRz19rjnkFNuYQ6cEUcPKCo0P1rD/JLeTN2Z5A=="], + "@ai-sdk/google": ["@ai-sdk/google@2.0.43", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-qO6giuoYCX/SdZScP/3VO5Xnbd392zm3HrTkhab/efocZU8J/VVEAcAUE1KJh0qOIAYllofRtpJIUGkRK8Q5rw=="], - "@ai-sdk/openai": ["@ai-sdk/openai@2.0.68", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-qUSLFkqgUoFArzBwttu0KWVAZYjbsdZGOklSJXpfZ2nDC61yseHxtcnuG8u6tqKnGXDh4eakEgREDWU2sRht7A=="], + "@ai-sdk/openai": ["@ai-sdk/openai@2.0.72", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-9j8Gdt9gFiUGFdQIjjynbC7+w8YQxkXje6dwAq1v2Pj17wmB3U0Td3lnEe/a+EnEysY3mdkc8dHPYc5BNev9NQ=="], "@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@1.0.27", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-bpYruxVLhrTbVH6CCq48zMJNeHu6FmHtEedl9FXckEgcIEAi036idFhJlcRwC1jNCwlacbzb8dPD7OAH1EKJaQ=="], @@ -151,7 +150,7 @@ "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.17", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-TR3Gs4I3Tym4Ll+EPdzRdvo/rc8Js6c4nVhFLuvGLX/Y4V9ZcQMa/HTiYsHEgmYrf1zVi6Q145UEZUfleOwOjw=="], - "@ai-sdk/xai": ["@ai-sdk/xai@2.0.33", "", { "dependencies": { "@ai-sdk/openai-compatible": "1.0.27", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-0+S+hxbAj8dA8/3dYQsmgkVkPcs8yptO1ueLWtJpa6PYjrdyliDcPSCZREL8aE76vHGvFsYlRABFfH9Ps2M8tg=="], + "@ai-sdk/xai": ["@ai-sdk/xai@2.0.36", "", { "dependencies": { "@ai-sdk/openai-compatible": "1.0.27", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-tQuCDVNK4W4fiom59r2UnU7u9SAz58fpl5yKYoS9IbMOrDRO3fzQGWmj2p8MUvz9LzXf6hiyUkVNFGzzx+uZcw=="], "@antfu/install-pkg": ["@antfu/install-pkg@1.1.0", "", { "dependencies": { "package-manager-detector": "^1.3.0", "tinyexec": "^1.0.1" } }, "sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ=="], @@ -523,7 +522,7 @@ "@npmcli/move-file": ["@npmcli/move-file@2.0.1", "", { "dependencies": { "mkdirp": "^1.0.4", "rimraf": "^3.0.2" } }, "sha512-mJd2Z5TjYWq/ttPLLGqArdtnC74J6bOzg4rMDnN+p1xTacZ2yPRCk2y0oSWQtygLR9YVQXgOcONrwtnk3JupxQ=="], - "@openrouter/ai-sdk-provider": ["@openrouter/ai-sdk-provider@1.2.3", "", { "dependencies": { "@openrouter/sdk": "^0.1.8" }, "peerDependencies": { "ai": "^5.0.0", "zod": "^3.24.1 || ^v4" } }, "sha512-a6Nc8dPRHakRH9966YJ/HZJhLOds7DuPTscNZDoAr+Aw+tEFUlacSJMvb/b3gukn74mgbuaJRji9YOn62ipfVg=="], + "@openrouter/ai-sdk-provider": ["@openrouter/ai-sdk-provider@1.2.5", "", { "dependencies": { "@openrouter/sdk": "^0.1.8" }, "peerDependencies": { "ai": "^5.0.0", "zod": "^3.24.1 || ^v4" } }, "sha512-NrvJFPvdEUo6DYUQIVWPGfhafuZ2PAIX7+CUMKGknv8TcTNVo0TyP1y5SU7Bgjf/Wup9/74UFKUB07icOhVZjQ=="], "@openrouter/sdk": ["@openrouter/sdk@0.1.11", "", { "dependencies": { "zod": "^3.25.0 || ^4.0.0" }, "peerDependencies": { "@tanstack/react-query": "^5", "react": "^18 || ^19", "react-dom": "^18 || ^19" }, "optionalPeers": ["@tanstack/react-query", "react", "react-dom"] }, "sha512-OuPc8qqidL/PUM8+9WgrOfSR9+b6rKIWiezGcUJ54iPTdh+Gye5Qjut6hrLWlOCMZE7Z853gN90r1ft4iChj7Q=="], @@ -1085,7 +1084,7 @@ "@unrs/resolver-binding-win32-x64-msvc": ["@unrs/resolver-binding-win32-x64-msvc@1.11.1", "", { "os": "win32", "cpu": "x64" }, "sha512-lrW200hZdbfRtztbygyaq/6jP6AKE8qQN2KvPcJ+x7wiD038YtnYtZ82IMNJ69GJibV7bwL3y9FgK+5w/pYt6g=="], - "@vercel/oidc": ["@vercel/oidc@3.0.3", "", {}, "sha512-yNEQvPcVrK9sIe637+I0jD6leluPxzwJKx/Haw6F4H77CdDsszUn5V3o96LPziXkSNE2B83+Z3mjqGKBK/R6Gg=="], + "@vercel/oidc": ["@vercel/oidc@3.0.5", "", {}, "sha512-fnYhv671l+eTTp48gB4zEsTW/YtRgRPnkI2nT7x6qw5rkI1Lq2hTmQIpHPgyThI0znLK+vX2n9XxKdXZ7BUbbw=="], "@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="], @@ -1115,7 +1114,7 @@ "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], - "ai": ["ai@5.0.94", "", { "dependencies": { "@ai-sdk/gateway": "2.0.10", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-PQs3H8xDhORG/L6eyR+SxAKUsa0ORO4ENvRovzGgPmPGCIlwle6UD5VIMQFtj1gvZk+BKBUVEFFwtkTeJTAURw=="], + "ai": ["ai@5.0.101", "", { "dependencies": { "@ai-sdk/gateway": "2.0.15", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.17", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-/P4fgs2PGYTBaZi192YkPikOudsl9vccA65F7J7LvoNTOoP5kh1yAsJPsKAy6FXU32bAngai7ft1UDyC3u7z5g=="], "ai-tokenizer": ["ai-tokenizer@1.0.4", "", { "peerDependencies": { "ai": "^5.0.0" }, "optionalPeers": ["ai"] }, "sha512-BHOUljsmH0SEO9bULQL3sz6pJ4jv00r+NHxX3kR6tn1suAAj6DDN4njSk+sqCOI5Cm6FqizUhDfoYZ0R+5/WVQ=="], diff --git a/package.json b/package.json index b6e01fd514..191c49eaf8 100644 --- a/package.json +++ b/package.json @@ -45,12 +45,12 @@ "postinstall": "sh scripts/postinstall.sh" }, "dependencies": { - "@ai-sdk/anthropic": "^2.0.44", - "@ai-sdk/google": "^2.0.38", - "@ai-sdk/openai": "^2.0.66", - "@ai-sdk/xai": "^2.0.33", + "@ai-sdk/anthropic": "^2.0.47", + "@ai-sdk/google": "^2.0.43", + "@ai-sdk/openai": "^2.0.72", + "@ai-sdk/xai": "^2.0.36", "@lydell/node-pty": "1.1.0", - "@openrouter/ai-sdk-provider": "^1.2.2", + "@openrouter/ai-sdk-provider": "^1.2.5", "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", @@ -62,7 +62,7 @@ "@radix-ui/react-tabs": "^1.1.13", "@radix-ui/react-toggle-group": "^1.1.11", "@radix-ui/react-tooltip": "^1.2.8", - "ai": "^5.0.93", + "ai": "^5.0.101", "ai-tokenizer": "^1.0.4", "chalk": "^5.6.2", "cors": "^2.8.5", diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index b1e9f55dbc..d343c49499 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -19,6 +19,7 @@ import { import { log } from "@/node/services/log"; import type { MuxMessage } from "@/common/types/message"; import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy"; +import { getModelStats } from "@/common/utils/tokens/modelStats"; /** * OpenRouter reasoning options @@ -277,3 +278,77 @@ export function buildProviderOptions( log.debug("buildProviderOptions: Unsupported provider", provider); return {}; } + +/** + * Calculate the effective maxOutputTokens for a model based on its limits and thinking budget + * + * For Anthropic models with extended thinking, the AI SDK adds thinkingBudget to maxOutputTokens + * internally. We need to ensure the sum doesn't exceed the model's max_output_tokens limit. + * + * For example, Claude Opus 4 has max_output_tokens=32000. If we use: + * - thinkingBudget=20000 (high) + * - maxOutputTokens=32000 + * Then total=52000 which exceeds 32000 → SDK shows warning and caps output + * + * Solution: Reduce maxOutputTokens so that maxOutputTokens + thinkingBudget <= model limit + * + * @param modelString - Full model string (e.g., "anthropic:claude-opus-4-1") + * @param thinkingLevel - Current thinking level + * @param requestedMaxOutputTokens - Optional user-requested maxOutputTokens + * @returns Effective maxOutputTokens that respects model limits with thinking budget + */ +export function calculateEffectiveMaxOutputTokens( + modelString: string, + thinkingLevel: ThinkingLevel, + requestedMaxOutputTokens?: number +): number | undefined { + const [provider] = modelString.split(":"); + + // Only apply this adjustment for Anthropic models + if (provider !== "anthropic") { + return requestedMaxOutputTokens; + } + + // Get the actual thinking level after policy enforcement + const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel); + const thinkingBudget = ANTHROPIC_THINKING_BUDGETS[effectiveThinking]; + + // Get model's max output tokens from models.json + const modelStats = getModelStats(modelString); + const modelMaxOutput = modelStats?.max_output_tokens; + + // If we don't know the model's max output, return requested value + if (!modelMaxOutput) { + log.debug("calculateEffectiveMaxOutputTokens: Unknown model max output, using requested", { + modelString, + requestedMaxOutputTokens, + }); + return requestedMaxOutputTokens; + } + + // Calculate the maximum safe maxOutputTokens + // The SDK will add thinkingBudget to maxOutputTokens, so we need room for both + const maxSafeOutput = modelMaxOutput - thinkingBudget; + + // If user didn't request specific tokens, use the max safe value + const targetOutput = requestedMaxOutputTokens ?? modelMaxOutput; + + // Cap at the safe maximum + const effectiveOutput = Math.min(targetOutput, maxSafeOutput); + + // Ensure we don't go below a reasonable minimum (1000 tokens) + const finalOutput = Math.max(effectiveOutput, 1000); + + log.debug("calculateEffectiveMaxOutputTokens", { + modelString, + thinkingLevel, + effectiveThinking, + thinkingBudget, + modelMaxOutput, + requestedMaxOutputTokens, + maxSafeOutput, + finalOutput, + }); + + return finalOutput; +} diff --git a/src/common/utils/tokens/modelStats.ts b/src/common/utils/tokens/modelStats.ts index 664b7db593..4e0dd6bc38 100644 --- a/src/common/utils/tokens/modelStats.ts +++ b/src/common/utils/tokens/modelStats.ts @@ -3,6 +3,7 @@ import { modelsExtra } from "./models-extra"; export interface ModelStats { max_input_tokens: number; + max_output_tokens?: number; input_cost_per_token: number; output_cost_per_token: number; cache_creation_input_token_cost?: number; @@ -11,6 +12,7 @@ export interface ModelStats { interface RawModelData { max_input_tokens?: number | string; + max_output_tokens?: number | string; input_cost_per_token?: number; output_cost_per_token?: number; cache_creation_input_token_cost?: number; @@ -37,6 +39,8 @@ function extractModelStats(data: RawModelData): ModelStats { /* eslint-disable @typescript-eslint/non-nullable-type-assertion-style */ return { max_input_tokens: data.max_input_tokens as number, + max_output_tokens: + typeof data.max_output_tokens === "number" ? data.max_output_tokens : undefined, input_cost_per_token: data.input_cost_per_token as number, output_cost_per_token: data.output_cost_per_token as number, cache_creation_input_token_cost: diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index edefced357..6e496c6ced 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -22,15 +22,16 @@ interface ModelData { } export const modelsExtra: Record = { - // Claude Opus 4.5 - Released November 2025 - // $15/M input, $75/M output (same pricing as Opus 4.1) + // Claude Opus 4.5 - Released November 24, 2025 + // $5/M input, $25/M output (price drop from Opus 4.1's $15/$75) + // 64K max output tokens (matches Sonnet 4.5) "claude-opus-4-5": { max_input_tokens: 200000, - max_output_tokens: 32000, - input_cost_per_token: 0.000015, // $15 per million input tokens - output_cost_per_token: 0.000075, // $75 per million output tokens - cache_creation_input_token_cost: 0.00001875, // $18.75 per million tokens - cache_read_input_token_cost: 0.0000015, // $1.50 per million tokens + max_output_tokens: 64000, + input_cost_per_token: 0.000005, // $5 per million input tokens + output_cost_per_token: 0.000025, // $25 per million output tokens + cache_creation_input_token_cost: 0.00000625, // $6.25 per million tokens (estimated) + cache_read_input_token_cost: 0.0000005, // $0.50 per million tokens (estimated) litellm_provider: "anthropic", mode: "chat", supports_function_calling: true, diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json index a0142b393b..855e0ae542 100644 --- a/src/common/utils/tokens/models.json +++ b/src/common/utils/tokens/models.json @@ -436,39 +436,45 @@ "cache_creation_input_token_cost": 1.25e-6, "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 1e-6, - "litellm_provider": "bedrock", + "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-haiku-4-5@20251001": { "cache_creation_input_token_cost": 1.25e-6, "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 1e-6, - "litellm_provider": "bedrock", + "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-6, @@ -676,6 +682,36 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "anthropic.claude-v1": { "input_cost_per_token": 8e-6, "litellm_provider": "bedrock", @@ -904,20 +940,23 @@ "cache_creation_input_token_cost": 1.375e-6, "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 1.1e-6, - "litellm_provider": "bedrock", + "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-6, @@ -1224,6 +1263,98 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure/eu/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/eu/gpt-5-nano-2025-08-07": { "cache_read_input_token_cost": 5.5e-9, "input_cost_per_token": 5.5e-8, @@ -1389,6 +1520,98 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure/global/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/gpt-3.5-turbo": { "input_cost_per_token": 5e-7, "litellm_provider": "azure", @@ -1851,7 +2074,7 @@ "supports_tool_choice": true, "supports_vision": true }, - "azure/gpt-4o-audio-preview-2024-12-17": { + "azure/gpt-audio-2025-08-28": { "input_cost_per_audio_token": 4e-5, "input_cost_per_token": 2.5e-6, "litellm_provider": "azure", @@ -1874,26 +2097,72 @@ "supports_tool_choice": true, "supports_vision": false }, - "azure/gpt-4o-mini": { - "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_token": 1.65e-7, + "azure/gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 1e-5, + "input_cost_per_token": 6e-7, "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, "mode": "chat", - "output_cost_per_token": 6.6e-7, + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, + "supports_native_streaming": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true, - "supports_response_schema": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": false }, - "azure/gpt-4o-mini-2024-07-18": { - "cache_read_input_token_cost": 7.5e-8, - "input_cost_per_token": 1.65e-7, - "litellm_provider": "azure", + "azure/gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-5, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-7, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 7.5e-8, + "input_cost_per_token": 1.65e-7, + "litellm_provider": "azure", "max_input_tokens": 128000, "max_output_tokens": 16384, "max_tokens": 16384, @@ -1948,6 +2217,52 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "azure/gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-6, + "cache_read_input_token_cost": 4e-6, + "input_cost_per_audio_token": 3.2e-5, + "input_cost_per_image": 5e-6, + "input_cost_per_token": 4e-6, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-5, + "output_cost_per_token": 1.6e-5, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_token_cost": 6e-8, + "input_cost_per_audio_token": 1e-5, + "input_cost_per_image": 8e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-5, + "output_cost_per_token": 2.4e-6, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-4o-mini-transcribe": { "input_cost_per_audio_token": 3e-6, "input_cost_per_token": 1.25e-6, @@ -2018,6 +2333,121 @@ "output_cost_per_token": 1e-5, "supported_endpoints": ["/v1/audio/transcriptions"] }, + "azure/gpt-4o-transcribe-diarize": { + "input_cost_per_audio_token": 6e-6, + "input_cost_per_token": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/audio/transcriptions"] + }, + "azure/gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.1-chat-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "azure/gpt-5.1-codex-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 1.25e-6, + "input_cost_per_token_priority": 2.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "output_cost_per_token_priority": 2e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini-2025-11-13": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "output_cost_per_token_priority": 3.6e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 1.25e-6, @@ -2248,6 +2678,98 @@ "supports_tool_choice": true, "supports_vision": true }, + "azure/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "input_cost_per_token": 1.25e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-5, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-6, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, "azure/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", @@ -2919,32 +3441,124 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_audio_token_cost": 2.5e-6, + "cache_read_input_token_cost": 2.75e-6, + "input_cost_per_audio_token": 4.4e-5, + "input_cost_per_token": 5.5e-6, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-5, + "output_cost_per_token": 2.2e-5, + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.375e-7, + "input_cost_per_token": 1.375e-6, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.75e-8, + "input_cost_per_token": 2.75e-7, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5.5e-9, + "input_cost_per_token": 5.5e-8, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.4e-7, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true }, - "azure/us/gpt-4o-realtime-preview-2024-12-17": { - "cache_read_input_audio_token_cost": 2.5e-6, - "cache_read_input_token_cost": 2.75e-6, - "input_cost_per_audio_token": 4.4e-5, - "input_cost_per_token": 5.5e-6, + "azure/us/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, "litellm_provider": "azure", - "max_input_tokens": 128000, - "max_output_tokens": 4096, - "max_tokens": 4096, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", - "output_cost_per_audio_token": 8e-5, - "output_cost_per_token": 2.2e-5, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], - "supports_audio_input": true, - "supports_audio_output": true, + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, + "supports_native_streaming": true, "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, "supports_system_messages": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, - "azure/us/gpt-5-2025-08-07": { - "cache_read_input_token_cost": 1.375e-7, - "input_cost_per_token": 1.375e-6, + "azure/us/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, @@ -2953,7 +3567,7 @@ "output_cost_per_token": 1.1e-5, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2965,16 +3579,16 @@ "supports_tool_choice": true, "supports_vision": true }, - "azure/us/gpt-5-mini-2025-08-07": { - "cache_read_input_token_cost": 2.75e-8, - "input_cost_per_token": 2.75e-7, + "azure/us/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-7, + "input_cost_per_token": 1.38e-6, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 2.2e-6, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "mode": "responses", + "output_cost_per_token": 1.1e-5, + "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_function_calling": true, @@ -2984,20 +3598,20 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_system_messages": true, + "supports_system_messages": false, "supports_tool_choice": true, "supports_vision": true }, - "azure/us/gpt-5-nano-2025-08-07": { - "cache_read_input_token_cost": 5.5e-9, - "input_cost_per_token": 5.5e-8, + "azure/us/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-8, + "input_cost_per_token": 2.75e-7, "litellm_provider": "azure", "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, - "mode": "chat", - "output_cost_per_token": 4.4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "mode": "responses", + "output_cost_per_token": 2.2e-6, + "supported_endpoints": ["/v1/responses"], "supported_modalities": ["text", "image"], "supported_output_modalities": ["text"], "supports_function_calling": true, @@ -3007,7 +3621,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_system_messages": true, + "supports_system_messages": false, "supports_tool_choice": true, "supports_vision": true }, @@ -4359,6 +4973,24 @@ "supports_tool_choice": true, "supports_vision": true }, + "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { + "input_cost_per_token": 3.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 2.65e-6, "litellm_provider": "bedrock", @@ -4486,6 +5118,24 @@ "supports_tool_choice": true, "supports_vision": true }, + "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { + "input_cost_per_token": 3.3e-6, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.65e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 2.65e-6, "litellm_provider": "bedrock", @@ -4692,7 +5342,7 @@ "supports_function_calling": true, "supports_tool_choice": true }, - "cerebras/openai/gpt-oss-120b": { + "cerebras/gpt-oss-120b": { "input_cost_per_token": 2.5e-7, "litellm_provider": "cerebras", "max_input_tokens": 131072, @@ -5216,6 +5866,31 @@ "supports_web_search": true, "tool_use_system_prompt_tokens": 346 }, + "claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-6, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_200k_tokens": 6e-6, + "output_cost_per_token_above_200k_tokens": 2.25e-5, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-6, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "claude-opus-4-1": { "cache_creation_input_token_cost": 1.875e-5, "cache_creation_input_token_cost_above_1hr": 3e-5, @@ -7954,20 +8629,23 @@ "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 1.1e-6, "deprecation_date": "2026-10-15", - "litellm_provider": "bedrock", + "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-6, @@ -8424,6 +9102,18 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p1-terminus": { + "input_cost_per_token": 5.6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.68e-6, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": true + }, "fireworks_ai/accounts/fireworks/models/firefunction-v2": { "input_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", @@ -8502,6 +9192,20 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-thinking": { + "input_cost_per_token": 6e-7, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-6, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { "input_cost_per_token": 3e-6, "litellm_provider": "fireworks_ai", @@ -9765,6 +10469,29 @@ "supports_web_search": true, "tpm": 8000000 }, + "gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 65536, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, "gemini-2.5-flash-lite": { "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 5e-7, @@ -10073,10 +10800,12 @@ "supports_web_search": true }, "gemini-3-pro-preview": { - "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, "input_cost_per_token": 2e-6, "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -10090,8 +10819,46 @@ "mode": "chat", "output_cost_per_token": 1.2e-5, "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], "supported_output_modalities": ["text"], "supports_audio_input": true, @@ -11152,6 +11919,31 @@ "supports_web_search": true, "tpm": 8000000 }, + "gemini/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 65536, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_token": 1.2e-5, + "rpm": 1000, + "tpm": 4000000, + "output_cost_per_token_batches": 6e-6, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, "gemini/gemini-2.5-flash-lite": { "cache_read_input_token_cost": 2.5e-8, "input_cost_per_audio_token": 5e-7, @@ -11506,9 +12298,11 @@ "tpm": 800000 }, "gemini/gemini-3-pro-preview": { - "cache_read_input_token_cost": 3.125e-7, + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, "input_cost_per_token": 2e-6, "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -11522,9 +12316,10 @@ "mode": "chat", "output_cost_per_token": 1.2e-5, "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], "supported_modalities": ["text", "image", "audio", "video"], "supported_output_modalities": ["text"], "supports_audio_input": true, @@ -11848,7 +12643,7 @@ "supports_audio_output": false, "supports_function_calling": true, "supports_response_schema": true, - "supports_system_messages": true, + "supports_system_messages": false, "supports_tool_choice": true, "supports_vision": true }, @@ -12037,12 +12832,13 @@ "input_cost_per_token": 1.1e-6, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, @@ -14513,20 +15309,23 @@ "cache_creation_input_token_cost": 1.375e-6, "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 1.1e-6, - "litellm_provider": "bedrock", - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-7, @@ -17439,6 +18238,42 @@ "supports_tool_choice": true, "supports_vision": true }, + "openrouter/google/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-7, + "input_cost_per_token": 2e-6, + "input_cost_per_token_above_200k_tokens": 4e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_above_200k_tokens": 1.8e-5, + "output_cost_per_token_batches": 6e-6, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, "openrouter/google/gemini-pro-1.5": { "input_cost_per_image": 0.00265, "input_cost_per_token": 2.5e-6, @@ -19157,6 +19992,7 @@ "supports_reasoning": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, + "snowflake/claude-3-5-sonnet": { "litellm_provider": "snowflake", "max_input_tokens": 18000, @@ -19998,6 +20834,20 @@ "supports_parallel_function_calling": true, "supports_tool_choice": true }, + "together_ai/zai-org/GLM-4.6": { + "input_cost_per_token": 0.6e-6, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 2.2e-6, + "source": "https://www.together.ai/models/glm-4-6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "together_ai/moonshotai/Kimi-K2-Instruct-0905": { "input_cost_per_token": 1e-6, "litellm_provider": "together_ai", @@ -20118,20 +20968,23 @@ "cache_creation_input_token_cost": 1.375e-6, "cache_read_input_token_cost": 1.1e-7, "input_cost_per_token": 1.1e-6, - "litellm_provider": "bedrock", + "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-6, @@ -20289,14 +21142,16 @@ "input_cost_per_token": 1.1e-6, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_output_tokens": 64000, + "max_tokens": 64000, "mode": "chat", "output_cost_per_token": 5.5e-6, "supports_assistant_prefill": true, + "supports_computer_use": true, "supports_function_calling": true, "supports_pdf_input": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, @@ -21909,6 +22764,56 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "vertex_ai/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-8, + "input_cost_per_audio_token": 1e-6, + "input_cost_per_token": 3e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_reasoning_token": 2.5e-6, + "output_cost_per_token": 2.5e-6, + "rpm": 100000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000 + }, + "vertex_ai/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-6, + "input_cost_per_token_batches": 1e-6, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 65536, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_token": 1.2e-5, + "output_cost_per_token_batches": 6e-6, + "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" + }, "vertex_ai/imagegeneration@006": { "litellm_provider": "vertex_ai-image-models", "mode": "image_generation", @@ -21933,6 +22838,12 @@ "output_cost_per_image": 0.04, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, + "vertex_ai/imagen-3.0-capability-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-insert-objects" + }, "vertex_ai/imagen-4.0-fast-generate-001": { "litellm_provider": "vertex_ai-image-models", "mode": "image_generation", @@ -22398,7 +23309,7 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "video_generation", - "output_cost_per_second": 0.4, + "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", "supported_modalities": ["text"], "supported_output_modalities": ["video"] @@ -22408,7 +23319,27 @@ "max_input_tokens": 1024, "max_tokens": 1024, "mode": "video_generation", - "output_cost_per_second": 0.75, + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-fast-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "vertex_ai/veo-3.0-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", "supported_modalities": ["text"], "supported_output_modalities": ["video"] @@ -23323,83 +24254,148 @@ "supports_tool_choice": true, "supports_web_search": true }, - - "xai/grok-4-1-fast-reasoning": { + "xai/grok-4-fast-non-reasoning": { "litellm_provider": "xai", "max_input_tokens": 2e6, "max_output_tokens": 2e6, + "cache_read_input_token_cost": 0.05e-6, "max_tokens": 2e6, "mode": "chat", "input_cost_per_token": 0.2e-6, "input_cost_per_token_above_128k_tokens": 0.4e-6, "output_cost_per_token": 0.5e-6, "output_cost_per_token_above_128k_tokens": 1e-6, - "cache_read_input_token_cost": 0.05e-6, - "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-0709": { + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_128k_tokens": 6e-6, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_above_128k_tokens": 30e-6, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-latest": { + "input_cost_per_token": 3e-6, + "input_cost_per_token_above_128k_tokens": 6e-6, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-5, + "output_cost_per_token_above_128k_tokens": 30e-6, + "source": "https://docs.x.ai/docs/models", "supports_function_calling": true, "supports_tool_choice": true, "supports_web_search": true }, "xai/grok-4-1-fast": { + "cache_read_input_token_cost": 0.05e-6, + "input_cost_per_token": 0.2e-6, + "input_cost_per_token_above_128k_tokens": 0.4e-6, "litellm_provider": "xai", "max_input_tokens": 2e6, "max_output_tokens": 2e6, - "cache_read_input_token_cost": 0.05e-6, "max_tokens": 2e6, "mode": "chat", - "input_cost_per_token": 0.2e-6, - "input_cost_per_token_above_128k_tokens": 0.4e-6, "output_cost_per_token": 0.5e-6, "output_cost_per_token_above_128k_tokens": 1e-6, "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, "supports_tool_choice": true, + "supports_vision": true, "supports_web_search": true }, - "xai/grok-4-fast-non-reasoning": { + "xai/grok-4-1-fast-reasoning": { + "cache_read_input_token_cost": 0.05e-6, + "input_cost_per_token": 0.2e-6, + "input_cost_per_token_above_128k_tokens": 0.4e-6, "litellm_provider": "xai", "max_input_tokens": 2e6, "max_output_tokens": 2e6, - "cache_read_input_token_cost": 0.05e-6, "max_tokens": 2e6, "mode": "chat", + "output_cost_per_token": 0.5e-6, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-reasoning-latest": { + "cache_read_input_token_cost": 0.05e-6, "input_cost_per_token": 0.2e-6, "input_cost_per_token_above_128k_tokens": 0.4e-6, + "litellm_provider": "xai", + "max_input_tokens": 2e6, + "max_output_tokens": 2e6, + "max_tokens": 2e6, + "mode": "chat", "output_cost_per_token": 0.5e-6, "output_cost_per_token_above_128k_tokens": 1e-6, - "source": "https://docs.x.ai/docs/models", + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, "supports_tool_choice": true, + "supports_vision": true, "supports_web_search": true }, - "xai/grok-4-0709": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "xai/grok-4-1-fast-non-reasoning": { + "cache_read_input_token_cost": 0.05e-6, + "input_cost_per_token": 0.2e-6, + "input_cost_per_token_above_128k_tokens": 0.4e-6, "litellm_provider": "xai", - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "max_tokens": 256000, + "max_input_tokens": 2e6, + "max_output_tokens": 2e6, + "max_tokens": 2e6, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, - "source": "https://docs.x.ai/docs/models", + "output_cost_per_token": 0.5e-6, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, "supports_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true, + "supports_vision": true, "supports_web_search": true }, - "xai/grok-4-latest": { - "input_cost_per_token": 3e-6, - "input_cost_per_token_above_128k_tokens": 6e-6, + "xai/grok-4-1-fast-non-reasoning-latest": { + "cache_read_input_token_cost": 0.05e-6, + "input_cost_per_token": 0.2e-6, + "input_cost_per_token_above_128k_tokens": 0.4e-6, "litellm_provider": "xai", - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "max_tokens": 256000, + "max_input_tokens": 2e6, + "max_output_tokens": 2e6, + "max_tokens": 2e6, "mode": "chat", - "output_cost_per_token": 1.5e-5, - "output_cost_per_token_above_128k_tokens": 30e-6, - "source": "https://docs.x.ai/docs/models", + "output_cost_per_token": 0.5e-6, + "output_cost_per_token_above_128k_tokens": 1e-6, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, "supports_function_calling": true, + "supports_response_schema": true, "supports_tool_choice": true, + "supports_vision": true, "supports_web_search": true }, "xai/grok-beta": { diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 6cb748f913..81222beb1e 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -33,7 +33,10 @@ import type { HistoryService } from "./historyService"; import type { PartialService } from "./partialService"; import { buildSystemMessage, readToolInstructions } from "./systemMessage"; import { getTokenizerForModel } from "@/node/utils/main/tokenizer"; -import { buildProviderOptions } from "@/common/utils/ai/providerOptions"; +import { + buildProviderOptions, + calculateEffectiveMaxOutputTokens, +} from "@/common/utils/ai/providerOptions"; import type { ThinkingLevel } from "@/common/types/thinking"; import type { StreamAbortEvent, @@ -926,6 +929,15 @@ export class AIService extends EventEmitter { effectiveMuxProviderOptions ); + // Calculate effective maxOutputTokens that accounts for thinking budget + // For Anthropic models with extended thinking, the SDK adds thinkingBudget to maxOutputTokens + // so we need to ensure the sum doesn't exceed the model's max_output_tokens limit + const effectiveMaxOutputTokens = calculateEffectiveMaxOutputTokens( + effectiveModelString, + thinkingLevel ?? "off", + maxOutputTokens + ); + // Delegate to StreamManager with model instance, system message, tools, historySequence, and initial metadata const streamResult = await this.streamManager.startStream( workspaceId, @@ -943,7 +955,7 @@ export class AIService extends EventEmitter { mode, // Pass mode so it persists in final history entry }, providerOptions, - maxOutputTokens, + effectiveMaxOutputTokens, toolPolicy, streamToken // Pass the pre-generated stream token );