Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions tests/ipcMain/openai-web-search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ describeIntegration("OpenAI web_search integration tests", () => {
"Then tell me if it's a good day for a picnic.",
modelString("openai", "gpt-5.1-codex-mini"),
{
thinkingLevel: "medium", // Ensure reasoning without excessive deliberation
thinkingLevel: "low", // Ensure reasoning without excessive deliberation
}
);

Expand All @@ -48,8 +48,8 @@ describeIntegration("OpenAI web_search integration tests", () => {
// Collect and verify stream events
const collector = createEventCollector(env.sentEvents, workspaceId);

// Wait for stream to complete (90s should be enough for simple weather + analysis)
const streamEnd = await collector.waitForEvent("stream-end", 90000);
// Wait for stream to complete (150s should be enough for simple weather + analysis)
const streamEnd = await collector.waitForEvent("stream-end", 150000);
expect(streamEnd).toBeDefined();

// Verify no errors occurred - this is the KEY test
Expand Down Expand Up @@ -84,6 +84,6 @@ describeIntegration("OpenAI web_search integration tests", () => {
await cleanup();
}
},
120000 // 120 second timeout - reasoning + web_search should complete faster with simpler task
180000 // 180 second timeout - reasoning + web_search should complete faster with simpler task
);
});
43 changes: 33 additions & 10 deletions tests/ipcMain/runtimeExecuteBash.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,27 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s
.join("\n");
}

// Helper to calculate tool execution duration from captured events
function getToolDuration(
env: { sentEvents: Array<{ channel: string; data: unknown; timestamp: number }> },
toolName: string
): number {
const startEvent = env.sentEvents.find((e) => {
const msg = e.data as any;
return msg.type === "tool-call-start" && msg.toolName === toolName;
});

const endEvent = env.sentEvents.find((e) => {
const msg = e.data as any;
return msg.type === "tool-call-end" && msg.toolName === toolName;
});

if (startEvent && endEvent) {
return endEvent.timestamp - startEvent.timestamp;
}
return -1;
}

// Skip all tests if TEST_INTEGRATION is not set
const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;

Expand Down Expand Up @@ -259,16 +280,17 @@ describeIntegration("Runtime Bash Execution", () => {
// Test command that pipes file through stdin-reading command (grep)
// This would hang forever if stdin.close() was used instead of stdin.abort()
// Regression test for: https://github.com/coder/mux/issues/503
const startTime = Date.now();
const events = await sendMessageAndWait(
env,
workspaceId,
"Run bash: cat /tmp/test.json | grep test",
HAIKU_MODEL,
BASH_ONLY,
10000 // 10s timeout - should complete in ~4s per API call
30000 // Relaxed timeout for CI stability (was 10s)
);
const duration = Date.now() - startTime;

// Calculate actual tool execution duration
const toolDuration = getToolDuration(env, "bash");

// Extract response text
const responseText = extractTextFromEvents(events);
Expand All @@ -279,10 +301,9 @@ describeIntegration("Runtime Bash Execution", () => {
expect(bashOutput).toContain('"test": "data"');

// Verify command completed quickly (not hanging until timeout)
// With tokenizer preloading, both local and SSH complete in ~8s total
// Actual hangs would hit bash tool's 180s timeout
expect(toolDuration).toBeGreaterThan(0);
const maxDuration = 10000;
expect(duration).toBeLessThan(maxDuration);
expect(toolDuration).toBeLessThan(maxDuration);

// Verify bash tool was called
const toolCallStarts = events.filter((e: any) => e.type === "tool-call-start");
Expand Down Expand Up @@ -337,16 +358,17 @@ describeIntegration("Runtime Bash Execution", () => {

// Test grep | head pattern - this historically hangs over SSH
// This is a regression test for the bash hang issue
const startTime = Date.now();
const events = await sendMessageAndWait(
env,
workspaceId,
'Run bash: grep -n "terminal bench" testfile.txt | head -n 200',
HAIKU_MODEL,
BASH_ONLY,
15000 // 15s timeout - should complete quickly
30000 // Relaxed timeout for CI stability (was 15s)
);
const duration = Date.now() - startTime;

// Calculate actual tool execution duration
const toolDuration = getToolDuration(env, "bash");

// Extract response text
const responseText = extractTextFromEvents(events);
Expand All @@ -356,8 +378,9 @@ describeIntegration("Runtime Bash Execution", () => {

// Verify command completed quickly (not hanging until timeout)
// SSH runtime should complete in <10s even with high latency
expect(toolDuration).toBeGreaterThan(0);
const maxDuration = 15000;
expect(duration).toBeLessThan(maxDuration);
expect(toolDuration).toBeLessThan(maxDuration);

// Verify bash tool was called
const toolCallStarts = events.filter((e: any) => e.type === "tool-call-start");
Expand Down