Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ You can customize the `web-codegen-scorer eval` script with the following flags:
- Example: `web-codegen-scorer eval --model=gemini-2.5-flash --autorater-model=gemini-2.5-flash --env=<config path>`

- `--runner=<name>`: Specifies the runner to use to execute the eval. Supported runners are
`genkit` (default), `ai-sdk`, `gemini-cli`, `claude-code` or `codex`.
`ai-sdk` (default), `gemini-cli`, `claude-code` or `codex`.

- `--local`: Runs the script in local mode for the initial code generation request. Instead of
calling the LLM, it will attempt to read the initial code from a corresponding file in the
Expand Down
9 changes: 4 additions & 5 deletions docs/model-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
If you want to test out a model that isn't yet available in the runner, you can add
support for it by following these steps:

1. Ensure that the provider of the model is supported by [Genkit](https://genkit.dev/).
2. Find the provider for the model in `runner/codegen/genkit/providers`. If the provider hasn't been
implemented yet, do so by creating a new `GenkitModelProvider` and adding it to the
`MODEL_PROVIDERS` in `runner/genkit/models.ts`.
3. Add your model to the `GenkitModelProvider` configs.
1. Ensure that the provider of the model is supported by [AI SDK](https://ai-sdk.dev/).
2. Find the provider for the model in `runner/codegen/ai-sdk`. If the provider doesn't exist,
implement it by following the pattern from the existing providers.
3. Add your model to the `SUPPORTED_MODELS` array.
4. Done! 🎉 You can now run your model by passing `--model=<your model ID>`.
7 changes: 2 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,12 @@
"dependencies": {
"@ai-sdk/anthropic": "3.0.12",
"@ai-sdk/google": "3.0.7",
"@ai-sdk/mcp": "1.0.10",
"@ai-sdk/openai": "3.0.9",
"@ai-sdk/provider": "3.0.2",
"@ai-sdk/xai": "^3.0.26",
"@anthropic-ai/sdk": "^0.68.0",
"@axe-core/puppeteer": "^4.11.0",
"@genkit-ai/compat-oai": "1.23.0",
"@genkit-ai/googleai": "1.22.0",
"@genkit-ai/mcp": "1.22.0",
"@google/genai": "1.29.1",
"@inquirer/prompts": "^8.2.0",
"@safety-web/runner": "0.4.0-alpha.14",
Expand All @@ -73,8 +72,6 @@
"chalk": "^5.6.2",
"cli-progress": "^3.12.0",
"file-type": "^21.3.0",
"genkit": "^1.27.0",
"genkitx-anthropic": "0.25.0",
"handlebars": "^4.7.8",
"lighthouse": "^13.0.1",
"limiter": "^3.0.0",
Expand Down
2,416 changes: 128 additions & 2,288 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions report-app/angular.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,8 @@
"@firebase/app",
"@firebase/firestore",
"tiktoken",
"genkit",
"@genkit-ai/compat-oai",
"@genkit-ai/googleai",
"@genkit-ai/mcp",
"@google/genai",
"@google/generative-ai",
"genkitx-anthropic",
"node-fetch"
],
"allowedCommonJsDependencies": [
Expand Down
Binary file added report-app/public/ai-sdk.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 8 additions & 9 deletions report-app/report-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@ import {
AiChatRequest,
AIConfigState,
AssessmentResultFromReportServer,
IndividualAssessmentState,
RunInfo,
RunInfoFromReportServer,
} from '../runner/shared-interfaces';

// This will result in a lot of loading and would slow down the serving,
// so it's loaded lazily below.
import {type GenkitRunner} from '../runner/codegen/genkit/genkit-runner';
import {type AiSdkRunner} from '../runner/codegen/ai-sdk/ai-sdk-runner';

const app = express();
const reportsLoader = await getReportLoader();
Expand Down Expand Up @@ -89,11 +88,11 @@ app.get('/api/reports/:id', async (req, res) => {
res.json(result ?? []);
});

let llm: Promise<GenkitRunner> | null = null;
let llm: Promise<AiSdkRunner> | null = null;

/** Lazily initializes and returns the genkit runner. */
async function getOrCreateGenkitLlmRunner() {
const llm = new (await import('../runner/codegen/genkit/genkit-runner')).GenkitRunner();
/** Lazily initializes and returns the LLM runner. */
async function getOrCreateRunner() {
const llm = new (await import('../runner/codegen/ai-sdk/ai-sdk-runner')).AiSdkRunner();
// Gracefully shut down the runner on exit.
process.on('SIGINT', () => llm!.dispose());
process.on('SIGTERM', () => llm!.dispose());
Expand All @@ -116,7 +115,7 @@ app.post('/api/reports/:id/chat', async (req, res) => {

const abortController = new AbortController();
const summary = await chatWithReportAI(
await (llm ?? getOrCreateGenkitLlmRunner()),
await (llm ?? getOrCreateRunner()),
prompt,
abortController.signal,
allAssessments,
Expand All @@ -138,9 +137,9 @@ app.post('/api/reports/:id/chat', async (req, res) => {

app.get('/api/ai-config-state', async (req, res) => {
try {
const llm = await getOrCreateGenkitLlmRunner();
const llm = await getOrCreateRunner();
return res.json({
configuredModels: llm.getSupportedModelsWithAPIKey(),
configuredModels: llm.getSupportedModels(),
} satisfies AIConfigState);
} catch (e) {
console.error('Could not instantiate LLM instance. Error:', e);
Expand Down
1 change: 1 addition & 0 deletions report-app/src/app/shared/provider-label.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const exactMatches: Record<string, string> = {
'gemini-cli': 'gemini.webp',
genkit: 'genkit.png',
codex: 'open-ai.png',
'ai-sdk': 'ai-sdk.png',
};

@Component({
Expand Down
2 changes: 2 additions & 0 deletions runner/codegen/ai-sdk/ai-sdk-model-options.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
import {XaiProviderOptions} from '@ai-sdk/xai';
import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
import {LanguageModelV3, SharedV3ProviderOptions} from '@ai-sdk/provider';

Expand All @@ -9,6 +10,7 @@ export type AiSdkModelOptions = {
| {anthropic: AnthropicProviderOptions}
| {google: GoogleGenerativeAIProviderOptions}
| {openai: OpenAIResponsesProviderOptions}
| {xai: XaiProviderOptions}
// This supports extensions of `AISdkRunner` for custom model providers.
| SharedV3ProviderOptions;
};
80 changes: 70 additions & 10 deletions runner/codegen/ai-sdk/ai-sdk-runner.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import {AnthropicProviderOptions} from '@ai-sdk/anthropic';
import {GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
import {OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
import {
FilePart,
generateText,
LanguageModel,
ModelMessage,
Output,
SystemModelMessage,
TextPart,
ToolSet,
} from 'ai';
import {createMCPClient, MCPClient} from '@ai-sdk/mcp';
import {Experimental_StdioMCPTransport as StdioClientTransport} from '@ai-sdk/mcp/mcp-stdio';
import z from 'zod';
import {combineAbortSignals} from '../../utils/abort-signal.js';
import {callWithTimeout} from '../../utils/timeout.js';
Expand All @@ -21,24 +20,33 @@ import {
LocalLlmGenerateFilesResponse,
LocalLlmGenerateTextRequestOptions,
LocalLlmGenerateTextResponse,
McpServerDetails,
McpServerOptions,
PromptDataMessage,
} from '../llm-runner.js';
import {ANTHROPIC_MODELS, getAiSdkModelOptionsForAnthropic} from './anthropic.js';
import {getAiSdkModelOptionsForGoogle, GOOGLE_MODELS} from './google.js';
import {getAiSdkModelOptionsForOpenAI, OPENAI_MODELS} from './openai.js';
import {AiSdkModelOptions} from './ai-sdk-model-options.js';
import {getAiSdkModelOptionsForXai, XAI_MODELS} from './xai.js';

const SUPPORTED_MODELS = [...GOOGLE_MODELS, ...ANTHROPIC_MODELS, ...OPENAI_MODELS] as const;
const SUPPORTED_MODELS = [
...GOOGLE_MODELS,
...ANTHROPIC_MODELS,
...OPENAI_MODELS,
...XAI_MODELS,
] as const;

// Increased to a very high value as we rely on an actual timeout
// that aborts stuck LLM requests. WCS is targeting stability here;
// even if it involves many exponential backoff-waiting.
const DEFAULT_MAX_RETRIES = 100000;

export class AiSdkRunner implements LlmRunner {
displayName = 'AI SDK';
id = 'ai-sdk';
hasBuiltInRepairLoop = true;
readonly displayName = 'AI SDK';
readonly id = 'ai-sdk';
readonly hasBuiltInRepairLoop = true;
private mcpClients: MCPClient[] | null = null;

async generateText(
options: LocalLlmGenerateTextRequestOptions,
Expand All @@ -49,6 +57,7 @@ export class AiSdkRunner implements LlmRunner {
abortSignal: abortSignal,
messages: this.convertRequestToMessagesList(options),
maxRetries: DEFAULT_MAX_RETRIES,
tools: await this.getTools(),
}),
);

Expand All @@ -75,6 +84,7 @@ export class AiSdkRunner implements LlmRunner {
output: Output.object<z.infer<T>>({schema: options.schema}),
abortSignal: abortSignal,
maxRetries: DEFAULT_MAX_RETRIES,
tools: await this.getTools(),
}),
);

Expand Down Expand Up @@ -120,7 +130,42 @@ export class AiSdkRunner implements LlmRunner {
return [...SUPPORTED_MODELS];
}

async dispose(): Promise<void> {}
async dispose(): Promise<void> {
if (this.mcpClients) {
for (const client of this.mcpClients) {
try {
await client.close();
} catch (error) {
console.error(`Failed to close MCP client`, error);
}
}
}
}

async startMcpServerHost(
_hostName: string,
servers: McpServerOptions[],
): Promise<McpServerDetails> {
const details: McpServerDetails = {resources: [], tools: []};

for (const server of servers) {
const client = await createMCPClient({
transport: new StdioClientTransport({
command: server.command,
args: server.args,
env: server.env,
}),
});

const [resources, tools] = await Promise.all([client.listResources(), client.tools()]);
resources.resources.forEach(r => details.resources.push(r.name));
details.tools.push(...Object.keys(tools));
this.mcpClients ??= [];
this.mcpClients.push(client);
}

return details;
}

private async _wrapRequestWithTimeoutAndRateLimiting<T>(
request: LocalLlmGenerateTextRequestOptions | LocalLlmConstrainedOutputGenerateRequestOptions,
Expand All @@ -145,7 +190,8 @@ export class AiSdkRunner implements LlmRunner {
const result =
(await getAiSdkModelOptionsForGoogle(request.model)) ??
(await getAiSdkModelOptionsForAnthropic(request.model)) ??
(await getAiSdkModelOptionsForOpenAI(request.model));
(await getAiSdkModelOptionsForOpenAI(request.model)) ??
(await getAiSdkModelOptionsForXai(request.model));
if (result === null) {
throw new Error(`Unexpected unsupported model: ${request.model}`);
}
Expand Down Expand Up @@ -198,4 +244,18 @@ export class AiSdkRunner implements LlmRunner {
}
return result;
}

private async getTools(): Promise<ToolSet | undefined> {
let tools: ToolSet | undefined;

if (this.mcpClients) {
for (const client of this.mcpClients) {
const clientTools = (await client.tools()) as ToolSet;
tools ??= {};
Object.keys(clientTools).forEach(name => (tools![name] = clientTools[name]));
}
}

return tools;
}
}
7 changes: 4 additions & 3 deletions runner/codegen/ai-sdk/anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
import {createAnthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
import {wrapLanguageModel} from 'ai';
import {anthropicThinkingWithStructuredResponseMiddleware} from './anthropic_thinking_patch.js';
import {AiSdkModelOptions} from './ai-sdk-model-options.js';
Expand All @@ -19,6 +19,7 @@ export async function getAiSdkModelOptionsForAnthropic(
rawModelName: string,
): Promise<AiSdkModelOptions | null> {
const modelName = rawModelName as (typeof ANTHROPIC_MODELS)[number];
const provideModel = createAnthropic({apiKey: process.env['ANTHROPIC_API_KEY']});

switch (modelName) {
case 'claude-opus-4.1-no-thinking':
Expand All @@ -36,13 +37,13 @@ export async function getAiSdkModelOptionsForAnthropic(
: modelName.endsWith('-32k')
? 32_000
: 16_000;
let apiModelName: Parameters<typeof anthropic>[0] = 'claude-sonnet-4-5';
let apiModelName = 'claude-sonnet-4-5';
if (modelName.includes('opus-4.1')) {
apiModelName = 'claude-opus-4-1';
} else if (modelName.includes('opus-4.5')) {
apiModelName = 'claude-opus-4-5';
}
const model = anthropic(apiModelName);
const model = provideModel(apiModelName);
return {
model: thinkingEnabled
? wrapLanguageModel({
Expand Down
9 changes: 5 additions & 4 deletions runner/codegen/ai-sdk/google.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {google, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
import {createGoogleGenerativeAI, GoogleGenerativeAIProviderOptions} from '@ai-sdk/google';
import {AiSdkModelOptions} from './ai-sdk-model-options.js';

export const GOOGLE_MODELS = [
Expand All @@ -15,14 +15,15 @@ export async function getAiSdkModelOptionsForGoogle(
rawModelName: string,
): Promise<AiSdkModelOptions | null> {
const modelName = rawModelName as (typeof GOOGLE_MODELS)[number];
const provideModel = createGoogleGenerativeAI({apiKey: process.env['GEMINI_API_KEY']});

switch (modelName) {
case 'gemini-2.5-flash-lite':
case 'gemini-2.5-flash':
case 'gemini-2.5-pro':
case 'gemini-3-pro-preview':
return {
model: google(modelName),
model: provideModel(modelName),
providerOptions: {
google: {
thinkingConfig: {
Expand All @@ -33,7 +34,7 @@ export async function getAiSdkModelOptionsForGoogle(
};
case 'gemini-2.5-flash-no-thinking': {
return {
model: google('gemini-2.5-flash'),
model: provideModel('gemini-2.5-flash'),
providerOptions: {
google: {
thinkingConfig: {
Expand All @@ -55,7 +56,7 @@ export async function getAiSdkModelOptionsForGoogle(
}

return {
model: google('gemini-2.5-flash'),
model: provideModel('gemini-2.5-flash'),
providerOptions: {
google: {
thinkingConfig: {
Expand Down
5 changes: 3 additions & 2 deletions runner/codegen/ai-sdk/openai.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {openai, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
import {createOpenAI, OpenAIResponsesProviderOptions} from '@ai-sdk/openai';
import {AiSdkModelOptions} from './ai-sdk-model-options.js';

export const OPENAI_MODELS = [
Expand All @@ -11,6 +11,7 @@ export const OPENAI_MODELS = [
export async function getAiSdkModelOptionsForOpenAI(
rawModelName: string,
): Promise<AiSdkModelOptions | null> {
const provideModel = createOpenAI({apiKey: process.env['OPENAI_API_KEY']});
const modelName = rawModelName as (typeof OPENAI_MODELS)[number];

switch (modelName) {
Expand All @@ -27,7 +28,7 @@ export async function getAiSdkModelOptionsForOpenAI(
reasoningEffort = 'low';
}
return {
model: openai('gpt-5.1'),
model: provideModel('gpt-5.1'),
providerOptions: {
openai: {
reasoningEffort,
Expand Down
Loading
Loading