🤖 ci: make grep|head test assertion deterministic (#1261)

ammar-agent · web-flow · commit 4e342d520cc7 · 2025-12-20T20:34:32.000-06:00
The test was flaky because it checked if the LLM response text contained
`terminal bench`. LLMs sometimes summarize command output instead of
quoting it verbatim.

Changed to verify the bash tool completed by checking for
`tool-call-end` events, which is deterministic and directly tests what
we care about (command completed without hanging).

---
_Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking:
`high`_
diff --git a/tests/ipc/runtimeExecuteBash.test.ts b/tests/ipc/runtimeExecuteBash.test.ts
@@ -387,11 +387,16 @@ describeIntegration("Runtime Bash Execution", () => {
               // Calculate actual tool execution duration
               const toolDuration = getToolDuration(events, "bash");
 
-              // Extract response text
-              const responseText = extractTextFromEvents(events);
-
               // Verify command completed successfully (not timeout)
-              expect(responseText).toContain("terminal bench");
+              // Check that the bash tool completed (tool-call-end events exist)
+              const toolCallEnds = events.filter(
+                (e) =>
+                  "type" in e &&
+                  e.type === "tool-call-end" &&
+                  "toolName" in e &&
+                  e.toolName === "bash"
+              );
+              expect(toolCallEnds.length).toBeGreaterThan(0);
 
               // Verify command completed quickly (not hanging until timeout)
               // SSH runtime should complete in <10s even with high latency