diff --git a/runner/codegen/ai-sdk/ai-sdk-model-options.ts b/runner/codegen/ai-sdk/ai-sdk-model-options.ts new file mode 100644 index 0000000..e2761f0 --- /dev/null +++ b/runner/codegen/ai-sdk/ai-sdk-model-options.ts @@ -0,0 +1,12 @@ +import {AnthropicProviderOptions} from '@ai-sdk/anthropic'; +import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google'; +import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai'; +import {LanguageModel} from 'ai'; + +export type ModelOptions = { + model: LanguageModel; + providerOptions: + | {anthropic: AnthropicProviderOptions} + | {google: GoogleGenerativeAIProviderOptions} + | {openai: OpenAIResponsesProviderOptions}; +}; diff --git a/runner/codegen/ai-sdk-runner.ts b/runner/codegen/ai-sdk/ai-sdk-runner.ts similarity index 56% rename from runner/codegen/ai-sdk-runner.ts rename to runner/codegen/ai-sdk/ai-sdk-runner.ts index 14ad02f..8b207d9 100644 --- a/runner/codegen/ai-sdk-runner.ts +++ b/runner/codegen/ai-sdk/ai-sdk-runner.ts @@ -1,13 +1,6 @@ -import { - LlmRunner, - LocalLlmConstrainedOutputGenerateRequestOptions, - LocalLlmConstrainedOutputGenerateResponse, - LocalLlmGenerateFilesRequestOptions, - LocalLlmGenerateFilesResponse, - LocalLlmGenerateTextRequestOptions, - LocalLlmGenerateTextResponse, - PromptDataMessage, -} from './llm-runner.js'; +import {AnthropicProviderOptions} from '@ai-sdk/anthropic'; +import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google'; +import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai'; import { FilePart, generateObject, @@ -16,43 +9,31 @@ import { ModelMessage, SystemModelMessage, TextPart, - wrapLanguageModel, } from 'ai'; -import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google'; -import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic'; -import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai'; import z from 'zod'; -import {callWithTimeout} from '../utils/timeout.js'; -import {combineAbortSignals} from '../utils/abort-signal.js'; -import {anthropicThinkingWithStructuredResponseMiddleware} from './ai-sdk-claude-thinking-patch.js'; +import {combineAbortSignals} from '../../utils/abort-signal.js'; +import {callWithTimeout} from '../../utils/timeout.js'; +import { + LlmRunner, + LocalLlmConstrainedOutputGenerateRequestOptions, + LocalLlmConstrainedOutputGenerateResponse, + LocalLlmGenerateFilesRequestOptions, + LocalLlmGenerateFilesResponse, + LocalLlmGenerateTextRequestOptions, + LocalLlmGenerateTextResponse, + PromptDataMessage, +} from '../llm-runner.js'; +import {ANTHROPIC_MODELS, getAiSdkModelOptionsForAnthropic} from './anthropic.js'; +import {getAiSdkModelOptionsForGoogle, GOOGLE_MODELS} from './google.js'; +import {getAiSdkModelOptionsForOpenAI, OPENAI_MODELS} from './openai.js'; -const SUPPORTED_MODELS = [ - 'claude-opus-4.1-no-thinking', - 'claude-opus-4.1-with-thinking-16k', - 'claude-opus-4.1-with-thinking-32k', - 'claude-sonnet-4.5-no-thinking', - 'claude-sonnet-4.5-with-thinking-16k', - 'claude-sonnet-4.5-with-thinking-32k', - 'gemini-2.5-flash-lite', - 'gemini-2.5-flash', - 'gemini-2.5-flash-with-thinking-dynamic', - 'gemini-2.5-flash-with-thinking-16k', - 'gemini-2.5-flash-with-thinking-24k', - 'gemini-2.5-pro', - 'gemini-3-pro-preview', - 'gpt-5.1-no-thinking', - 'gpt-5.1-thinking-low', - 'gpt-5.1-thinking-high', - 'gpt-5.1-thinking-medium', -] as const; +const SUPPORTED_MODELS = [...GOOGLE_MODELS, ...ANTHROPIC_MODELS, ...OPENAI_MODELS] as const; // Increased to a very high value as we rely on an actual timeout // that aborts stuck LLM requests. WCS is targeting stability here; // even if it involves many exponential backoff-waiting. const DEFAULT_MAX_RETRIES = 100000; -const claude16kThinkingTokenBudget = 16_000; -const claude32kThinkingTokenBudget = 32_000; export class AiSDKRunner implements LlmRunner { displayName = 'AI SDK'; id = 'ai-sdk'; @@ -164,100 +145,14 @@ export class AiSDKRunner implements LlmRunner { | {google: GoogleGenerativeAIProviderOptions} | {openai: OpenAIResponsesProviderOptions}; }> { - const modelName = request.model as (typeof SUPPORTED_MODELS)[number]; - switch (modelName) { - case 'claude-opus-4.1-no-thinking': - case 'claude-opus-4.1-with-thinking-16k': - case 'claude-opus-4.1-with-thinking-32k': - case 'claude-sonnet-4.5-no-thinking': - case 'claude-sonnet-4.5-with-thinking-16k': - case 'claude-sonnet-4.5-with-thinking-32k': { - const thinkingEnabled = modelName.includes('-with-thinking'); - const thinkingBudget = !thinkingEnabled - ? undefined - : modelName.endsWith('-32k') - ? claude32kThinkingTokenBudget - : claude16kThinkingTokenBudget; - const isOpus4_1Model = modelName.includes('opus-4.1'); - const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5'); - return { - model: thinkingEnabled - ? wrapLanguageModel({ - model, - middleware: anthropicThinkingWithStructuredResponseMiddleware, - }) - : model, - providerOptions: { - anthropic: { - sendReasoning: thinkingEnabled, - thinking: { - type: thinkingEnabled ? 'enabled' : 'disabled', - budgetTokens: thinkingBudget, - }, - } satisfies AnthropicProviderOptions, - }, - }; - } - case 'gemini-2.5-flash-lite': - case 'gemini-2.5-flash': - case 'gemini-2.5-pro': - case 'gemini-3-pro-preview': - return { - model: google(modelName), - providerOptions: { - google: { - thinkingConfig: { - includeThoughts: request.thinkingConfig?.includeThoughts, - }, - } satisfies GoogleGenerativeAIProviderOptions, - }, - }; - case 'gemini-2.5-flash-with-thinking-dynamic': - case 'gemini-2.5-flash-with-thinking-16k': - case 'gemini-2.5-flash-with-thinking-24k': - // -1 means "dynamic thinking budget": - // https://ai.google.dev/gemini-api/docs/thinking#set-budget. - let thinkingBudget = -1; - if (modelName.endsWith('-16k')) { - thinkingBudget = 16_000; - } else if (modelName.endsWith('-24k')) { - thinkingBudget = 24_000; - } - return { - model: google('gemini-2.5-flash'), - providerOptions: { - google: { - thinkingConfig: { - thinkingBudget: thinkingBudget, - includeThoughts: true, - }, - } satisfies GoogleGenerativeAIProviderOptions, - }, - }; - case 'gpt-5.1-no-thinking': - case 'gpt-5.1-thinking-low': - case 'gpt-5.1-thinking-medium': - case 'gpt-5.1-thinking-high': - let reasoningEffort: string = 'none'; - if (modelName === 'gpt-5.1-thinking-high') { - reasoningEffort = 'high'; - } else if (modelName === 'gpt-5.1-thinking-medium') { - reasoningEffort = 'medium'; - } else if (modelName === 'gpt-5.1-thinking-low') { - reasoningEffort = 'low'; - } - return { - model: openai('gpt-5.1'), - providerOptions: { - openai: { - reasoningEffort, - reasoningSummary: 'detailed', - } satisfies OpenAIResponsesProviderOptions, - }, - }; - default: - throw new Error(`Unexpected model in AI SDK runner: ${request.model}.`); + const result = + (await getAiSdkModelOptionsForGoogle(request.model)) ?? + (await getAiSdkModelOptionsForAnthropic(request.model)) ?? + (await getAiSdkModelOptionsForOpenAI(request.model)); + if (result === null) { + throw new Error(`Unexpected unsupported model: ${request.model}`); } + return result; } private _convertRequestToMessagesList( diff --git a/runner/codegen/ai-sdk/anthropic.ts b/runner/codegen/ai-sdk/anthropic.ts new file mode 100644 index 0000000..f4262e3 --- /dev/null +++ b/runner/codegen/ai-sdk/anthropic.ts @@ -0,0 +1,56 @@ +import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic'; +import {wrapLanguageModel} from 'ai'; +import {anthropicThinkingWithStructuredResponseMiddleware} from './anthropic_thinking_patch.js'; +import {ModelOptions} from './ai-sdk-model-options.js'; + +export const ANTHROPIC_MODELS = [ + 'claude-opus-4.1-no-thinking', + 'claude-opus-4.1-with-thinking-16k', + 'claude-opus-4.1-with-thinking-32k', + 'claude-sonnet-4.5-no-thinking', + 'claude-sonnet-4.5-with-thinking-16k', + 'claude-sonnet-4.5-with-thinking-32k', +] as const; + +export async function getAiSdkModelOptionsForAnthropic( + rawModelName: string, +): Promise { + const modelName = rawModelName as (typeof ANTHROPIC_MODELS)[number]; + + switch (modelName) { + case 'claude-opus-4.1-no-thinking': + case 'claude-opus-4.1-with-thinking-16k': + case 'claude-opus-4.1-with-thinking-32k': + case 'claude-sonnet-4.5-no-thinking': + case 'claude-sonnet-4.5-with-thinking-16k': + case 'claude-sonnet-4.5-with-thinking-32k': { + const thinkingEnabled = modelName.includes('-with-thinking'); + const thinkingBudget = !thinkingEnabled + ? undefined + : modelName.endsWith('-32k') + ? 32_000 + : 16_000; + const isOpus4_1Model = modelName.includes('opus-4.1'); + const model = anthropic(isOpus4_1Model ? 'claude-opus-4-1' : 'claude-sonnet-4-5'); + return { + model: thinkingEnabled + ? wrapLanguageModel({ + model, + middleware: anthropicThinkingWithStructuredResponseMiddleware, + }) + : model, + providerOptions: { + anthropic: { + sendReasoning: thinkingEnabled, + thinking: { + type: thinkingEnabled ? 'enabled' : 'disabled', + budgetTokens: thinkingBudget, + }, + } satisfies AnthropicProviderOptions, + }, + }; + } + default: + return null; + } +} diff --git a/runner/codegen/ai-sdk-claude-thinking-patch.ts b/runner/codegen/ai-sdk/anthropic_thinking_patch.ts similarity index 100% rename from runner/codegen/ai-sdk-claude-thinking-patch.ts rename to runner/codegen/ai-sdk/anthropic_thinking_patch.ts diff --git a/runner/codegen/ai-sdk/google.ts b/runner/codegen/ai-sdk/google.ts new file mode 100644 index 0000000..5c2595d --- /dev/null +++ b/runner/codegen/ai-sdk/google.ts @@ -0,0 +1,71 @@ +import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google'; +import {ModelOptions} from './ai-sdk-model-options.js'; + +export const GOOGLE_MODELS = [ + 'gemini-2.5-flash-lite', + 'gemini-2.5-flash', + 'gemini-2.5-flash-no-thinking', + 'gemini-2.5-flash-with-thinking-16k', + 'gemini-2.5-flash-with-thinking-24k', + 'gemini-2.5-pro', + 'gemini-3-pro-preview', +] as const; + +export async function getAiSdkModelOptionsForGoogle( + rawModelName: string, +): Promise { + const modelName = rawModelName as (typeof GOOGLE_MODELS)[number]; + + switch (modelName) { + case 'gemini-2.5-flash-lite': + case 'gemini-2.5-flash': + case 'gemini-2.5-pro': + case 'gemini-3-pro-preview': + return { + model: google(modelName), + providerOptions: { + google: { + thinkingConfig: { + includeThoughts: true, + }, + } satisfies GoogleGenerativeAIProviderOptions, + }, + }; + case 'gemini-2.5-flash-no-thinking': { + return { + model: google('gemini-2.5-flash'), + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: 0, + }, + }, + }, + }; + } + case 'gemini-2.5-flash-with-thinking-16k': + case 'gemini-2.5-flash-with-thinking-24k': + let thinkingBudget: number; + if (modelName.endsWith('-16k')) { + thinkingBudget = 16_000; + } else if (modelName.endsWith('-24k')) { + thinkingBudget = 24_000; + } else { + throw new Error(`Unexpected model: ${modelName}`); + } + + return { + model: google('gemini-2.5-flash'), + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: thinkingBudget, + includeThoughts: true, + }, + } satisfies GoogleGenerativeAIProviderOptions, + }, + }; + default: + return null; + } +} diff --git a/runner/codegen/ai-sdk/openai.ts b/runner/codegen/ai-sdk/openai.ts new file mode 100644 index 0000000..e419410 --- /dev/null +++ b/runner/codegen/ai-sdk/openai.ts @@ -0,0 +1,41 @@ +import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai'; +import {ModelOptions} from './ai-sdk-model-options.js'; + +export const OPENAI_MODELS = [ + 'gpt-5.1-no-thinking', + 'gpt-5.1-thinking-low', + 'gpt-5.1-thinking-high', + 'gpt-5.1-thinking-medium', +] as const; + +export async function getAiSdkModelOptionsForOpenAI( + rawModelName: string, +): Promise { + const modelName = rawModelName as (typeof OPENAI_MODELS)[number]; + + switch (modelName) { + case 'gpt-5.1-no-thinking': + case 'gpt-5.1-thinking-low': + case 'gpt-5.1-thinking-medium': + case 'gpt-5.1-thinking-high': + let reasoningEffort: string = 'none'; + if (modelName === 'gpt-5.1-thinking-high') { + reasoningEffort = 'high'; + } else if (modelName === 'gpt-5.1-thinking-medium') { + reasoningEffort = 'medium'; + } else if (modelName === 'gpt-5.1-thinking-low') { + reasoningEffort = 'low'; + } + return { + model: openai('gpt-5.1'), + providerOptions: { + openai: { + reasoningEffort, + reasoningSummary: 'detailed', + } satisfies OpenAIResponsesProviderOptions, + }, + }; + default: + return null; + } +} diff --git a/runner/codegen/runner-creation.ts b/runner/codegen/runner-creation.ts index 58fae61..ef1a493 100644 --- a/runner/codegen/runner-creation.ts +++ b/runner/codegen/runner-creation.ts @@ -4,7 +4,7 @@ import type {ClaudeCodeRunner} from './claude-code-runner.js'; import type {GenkitRunner} from './genkit/genkit-runner.js'; import type {CodexRunner} from './codex-runner.js'; import type {NoopUnimplementedRunner} from './noop-unimplemented-runner.js'; -import {AiSDKRunner} from './ai-sdk-runner.js'; +import {AiSDKRunner} from './ai-sdk/ai-sdk-runner.js'; interface AvailableRunners { genkit: GenkitRunner; @@ -30,7 +30,9 @@ export async function getRunnerByName(name: T): Promise new m.GenkitRunner() as AvailableRunners[T], ); case 'ai-sdk': - return import('./ai-sdk-runner.js').then(m => new m.AiSDKRunner() as AvailableRunners[T]); + return import('./ai-sdk/ai-sdk-runner.js').then( + m => new m.AiSDKRunner() as AvailableRunners[T], + ); case 'gemini-cli': return import('./gemini-cli-runner.js').then( m => new m.GeminiCliRunner() as AvailableRunners[T],