Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/nightly-terminal-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
id: set-models
run: |
if [ "${{ inputs.models }}" = "all" ] || [ -z "${{ inputs.models }}" ]; then
echo 'models=["anthropic:claude-sonnet-4-5","openai:gpt-5-codex"]' >> $GITHUB_OUTPUT
echo 'models=["anthropic:claude-sonnet-4-5","openai:gpt-5.1-codex"]' >> $GITHUB_OUTPUT
else
# Convert comma-separated to JSON array
models="${{ inputs.models }}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/terminal-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ on:
required: false
type: string
model_name:
description: 'Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)'
description: 'Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5.1-codex)'
required: false
type: string
thinking_level:
Expand Down
18 changes: 14 additions & 4 deletions src/constants/knownModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,30 @@ const MODEL_DEFINITIONS = {
GPT_MINI: {
provider: "openai",
providerModelId: "gpt-5.1-codex-mini",
aliases: ["codex-mini"],
},
} as const satisfies Record<string, KnownModelDefinition>;

export type KnownModelKey = keyof typeof MODEL_DEFINITIONS;
const MODEL_DEFINITION_ENTRIES = Object.entries(MODEL_DEFINITIONS) as Array<
[KnownModelKey, KnownModelDefinition]
>;

export const KNOWN_MODELS = Object.fromEntries(
Object.entries(MODEL_DEFINITIONS).map(([key, definition]) => [
MODEL_DEFINITION_ENTRIES.map(([key, definition]) => toKnownModelEntry(key, definition))
);
function toKnownModelEntry<K extends KnownModelKey>(
key: K,
definition: KnownModelDefinition
): [K, KnownModel] {
return [
key,
{
...definition,
id: `${definition.provider}:${definition.providerModelId}` as `${ModelProvider}:${string}`,
id: `${definition.provider}:${definition.providerModelId}`,
},
])
) as Record<KnownModelKey, KnownModel>;
];
}

export function getKnownModel(key: KnownModelKey): KnownModel {
return KNOWN_MODELS[key];
Expand Down
7 changes: 3 additions & 4 deletions tests/ipcMain/anthropic1MContext.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
createEventCollector,
assertStreamSuccess,
buildLargeHistory,
modelString,
} from "./helpers";

// Skip all tests if TEST_INTEGRATION is not set
Expand Down Expand Up @@ -42,8 +43,7 @@ describeIntegration("IpcMain anthropic 1M context integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"Summarize the context above in one word.",
"anthropic",
"claude-sonnet-4-5",
modelString("anthropic", "claude-sonnet-4-5"),
{
providerOptions: {
anthropic: {
Expand Down Expand Up @@ -76,8 +76,7 @@ describeIntegration("IpcMain anthropic 1M context integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"Summarize the context above in one word.",
"anthropic",
"claude-sonnet-4-5",
modelString("anthropic", "claude-sonnet-4-5"),
{
providerOptions: {
anthropic: {
Expand Down
19 changes: 7 additions & 12 deletions tests/ipcMain/forkWorkspace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
createEventCollector,
assertStreamSuccess,
waitFor,
modelString,
} from "./helpers";
import { detectDefaultTrunkBranch } from "../../src/git";
import { HistoryService } from "../../src/services/historyService";
Expand Down Expand Up @@ -100,8 +101,7 @@ describeIntegration("IpcMain fork workspace integration tests", () => {
env.mockIpcRenderer,
forkedWorkspaceId,
"What is 2+2? Answer with just the number.",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
);
expect(sendResult.success).toBe(true);

Expand Down Expand Up @@ -154,8 +154,7 @@ describeIntegration("IpcMain fork workspace integration tests", () => {
env.mockIpcRenderer,
forkedWorkspaceId,
"What word did I ask you to remember? Reply with just the word.",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
);
expect(sendResult.success).toBe(true);

Expand Down Expand Up @@ -206,15 +205,13 @@ describeIntegration("IpcMain fork workspace integration tests", () => {
env.mockIpcRenderer,
sourceWorkspaceId,
"What is 5+5? Answer with just the number.",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
),
sendMessageWithModel(
env.mockIpcRenderer,
forkedWorkspaceId,
"What is 3+3? Answer with just the number.",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
),
]);

Expand Down Expand Up @@ -253,8 +250,7 @@ describeIntegration("IpcMain fork workspace integration tests", () => {
env.mockIpcRenderer,
sourceWorkspaceId,
"Count from 1 to 10, one number per line. Then say 'Done counting.'",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
);

// Wait for stream to start and produce some content
Expand Down Expand Up @@ -286,8 +282,7 @@ describeIntegration("IpcMain fork workspace integration tests", () => {
env.mockIpcRenderer,
forkedWorkspaceId,
"What is 7+3? Answer with just the number.",
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
);
expect(forkedSendResult.success).toBe(true);

Expand Down
19 changes: 14 additions & 5 deletions tests/ipcMain/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import * as os from "os";
import { detectDefaultTrunkBranch } from "../../src/git";
import type { TestEnvironment } from "./setup";
import type { RuntimeConfig } from "../../src/types/runtime";
import { KNOWN_MODELS } from "../../src/constants/knownModels";
import type { ToolPolicy } from "../../src/utils/tools/toolPolicy";

// Test constants - centralized for consistency across all tests
Expand Down Expand Up @@ -46,6 +47,13 @@ export function modelString(provider: string, model: string): string {
/**
* Send a message via IPC
*/
type SendMessageWithModelOptions = Omit<SendMessageOptions, "model"> & {
imageParts?: Array<{ url: string; mediaType: string }>;
};

const DEFAULT_MODEL_ID = KNOWN_MODELS.SONNET.id;
const DEFAULT_PROVIDER = KNOWN_MODELS.SONNET.provider;

export async function sendMessage(
mockIpcRenderer: IpcRenderer,
workspaceId: string,
Expand All @@ -61,19 +69,20 @@ export async function sendMessage(
}

/**
* Send a message with a provider and model (convenience wrapper)
* Send a message with an explicit model id (defaults to SONNET).
*/
export async function sendMessageWithModel(
mockIpcRenderer: IpcRenderer,
workspaceId: string,
message: string,
provider = "anthropic",
model = "claude-sonnet-4-5",
options?: Omit<SendMessageOptions, "model">
modelId: string = DEFAULT_MODEL_ID,
options?: SendMessageWithModelOptions
): Promise<Result<void, SendMessageError>> {
const resolvedModel = modelId.includes(":") ? modelId : modelString(DEFAULT_PROVIDER, modelId);

return sendMessage(mockIpcRenderer, workspaceId, message, {
...options,
model: modelString(provider, model),
model: resolvedModel,
});
}

Expand Down
8 changes: 3 additions & 5 deletions tests/ipcMain/modelNotFound.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { setupWorkspace, shouldRunIntegrationTests, validateApiKeys } from "./setup";
import { sendMessageWithModel, createEventCollector, waitFor } from "./helpers";
import { sendMessageWithModel, createEventCollector, waitFor, modelString } from "./helpers";
import { IPC_CHANNELS } from "../../src/constants/ipc-constants";
import type { Result } from "../../src/types/result";
import type { SendMessageError } from "../../src/types/errors";
Expand Down Expand Up @@ -30,8 +30,7 @@ describeIntegration("IpcMain model_not_found error handling", () => {
env.mockIpcRenderer,
workspaceId,
"Hello",
"anthropic",
"invalid-model-that-does-not-exist-xyz123"
modelString("anthropic", "invalid-model-that-does-not-exist-xyz123")
);

// Collect events to verify error classification
Expand Down Expand Up @@ -69,8 +68,7 @@ describeIntegration("IpcMain model_not_found error handling", () => {
env.mockIpcRenderer,
workspaceId,
"Hello",
"openai",
"gpt-nonexistent-model-xyz123"
modelString("openai", "gpt-nonexistent-model-xyz123")
);

// Collect events to verify error classification
Expand Down
13 changes: 5 additions & 8 deletions tests/ipcMain/ollama.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
createEventCollector,
assertStreamSuccess,
extractTextFromEvents,
modelString,
} from "./helpers";
import { spawn } from "child_process";

Expand Down Expand Up @@ -105,8 +106,7 @@ describeOllama("IpcMain Ollama integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"Say 'hello' and nothing else",
"ollama",
OLLAMA_MODEL
modelString("ollama", OLLAMA_MODEL)
);

// Verify the IPC call succeeded
Expand Down Expand Up @@ -139,8 +139,7 @@ describeOllama("IpcMain Ollama integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"What is the current date and time? Use the bash tool to find out.",
"ollama",
OLLAMA_MODEL
modelString("ollama", OLLAMA_MODEL)
);

expect(result.success).toBe(true);
Expand Down Expand Up @@ -178,8 +177,7 @@ describeOllama("IpcMain Ollama integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"Read the README.md file and tell me what the first heading says.",
"ollama",
OLLAMA_MODEL
modelString("ollama", OLLAMA_MODEL)
);

expect(result.success).toBe(true);
Expand Down Expand Up @@ -216,8 +214,7 @@ describeOllama("IpcMain Ollama integration tests", () => {
env.mockIpcRenderer,
workspaceId,
"This should fail",
"ollama",
OLLAMA_MODEL,
modelString("ollama", OLLAMA_MODEL),
{
providerOptions: {
ollama: {},
Expand Down
10 changes: 7 additions & 3 deletions tests/ipcMain/openai-web-search.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { setupWorkspace, shouldRunIntegrationTests, validateApiKeys } from "./setup";
import { sendMessageWithModel, createEventCollector, assertStreamSuccess } from "./helpers";
import {
sendMessageWithModel,
createEventCollector,
assertStreamSuccess,
modelString,
} from "./helpers";

// Skip all tests if TEST_INTEGRATION is not set
const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
Expand Down Expand Up @@ -32,8 +37,7 @@ describeIntegration("OpenAI web_search integration tests", () => {
workspaceId,
"Use web search to find the current weather in San Francisco. " +
"Then tell me if it's a good day for a picnic.",
"openai",
"gpt-5-codex",
modelString("openai", "gpt-5.1-codex-mini"),
{
thinkingLevel: "medium", // Ensure reasoning without excessive deliberation
}
Expand Down
5 changes: 2 additions & 3 deletions tests/ipcMain/resumeStream.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { setupWorkspace, shouldRunIntegrationTests, validateApiKeys } from "./setup";
import { sendMessageWithModel, createEventCollector, waitFor } from "./helpers";
import { sendMessageWithModel, createEventCollector, waitFor, modelString } from "./helpers";
import { IPC_CHANNELS } from "../../src/constants/ipc-constants";
import type { Result } from "../../src/types/result";
import type { SendMessageError } from "../../src/types/errors";
Expand Down Expand Up @@ -31,8 +31,7 @@ describeIntegration("IpcMain resumeStream integration tests", () => {
env.mockIpcRenderer,
workspaceId,
`Run this bash command: for i in 1 2 3; do sleep 0.5; done && echo '${expectedWord}'`,
"anthropic",
"claude-sonnet-4-5"
modelString("anthropic", "claude-sonnet-4-5")
);

// Wait for stream to start
Expand Down
Loading