From 9e0d72487b74a8f461923a4887ce6412e97839e3 Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Wed, 3 Dec 2025 11:11:28 -0600
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20robust=20Electron?=
 =?UTF-8?q?=20E2E=20tests=20for=20regression=20prevention?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add E2E tests targeting recent regression patterns:
- Window lifecycle: startup, workspace loading, rapid navigation, IPC stability
- Streaming behavior: modal interruption, mode switching, error handling
- Persistence: chat history survives reload and settings navigation

Also:
- Add mock error scenarios (rate limit, server error, network error)
- Fix captureStreamTimeline to handle stream-error events
- Run E2E tests on macOS (depot runner) for window lifecycle coverage
- Fix macOS CI compatibility (grep -oE, conditional install-deps)

_Generated with mux_
---
 .github/actions/setup-playwright/action.yml   |  7 +-
 .github/workflows/ci.yml                      | 26 ++++-
 src/node/services/mock/scenarios.ts           |  2 +
 .../services/mock/scenarios/errorScenarios.ts | 97 +++++++++++++++++++
 tests/e2e/scenarios/persistence.spec.ts       | 41 ++++++++
 tests/e2e/scenarios/streamingBehavior.spec.ts | 77 +++++++++++++++
 tests/e2e/scenarios/windowLifecycle.spec.ts   | 61 ++++++++++++
 tests/e2e/utils/ui.ts                         |  5 +-
 8 files changed, 308 insertions(+), 8 deletions(-)
 create mode 100644 src/node/services/mock/scenarios/errorScenarios.ts
 create mode 100644 tests/e2e/scenarios/persistence.spec.ts
 create mode 100644 tests/e2e/scenarios/streamingBehavior.spec.ts
 create mode 100644 tests/e2e/scenarios/windowLifecycle.spec.ts

diff --git a/.github/actions/setup-playwright/action.yml b/.github/actions/setup-playwright/action.yml
index d64bb82ae1..2c0df71897 100644
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -12,8 +12,8 @@ runs:
       id: playwright-version
       shell: bash
       run: |
-        # Extract Playwright version from bun.lock
-        VERSION=$(grep -A1 '"playwright":' bun.lock | grep -oP '"\K[0-9]+\.[0-9]+\.[0-9]+' | head -1)
+        # Extract Playwright version from bun.lock (macOS-compatible regex)
+        VERSION=$(grep -A1 '"playwright":' bun.lock | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
         echo "version=$VERSION" >> $GITHUB_OUTPUT
         echo "Playwright version: $VERSION"
 
@@ -31,6 +31,7 @@ runs:
       shell: bash
       run: bun x playwright install ${{ inputs.browsers }}
 
-    - name: Install Playwright system dependencies
+    - name: Install Playwright system dependencies (Linux)
+      if: runner.os == 'Linux'
       shell: bash
       run: bun x playwright install-deps ${{ inputs.browsers }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1249877578..fc13a60377 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -148,9 +148,21 @@ jobs:
         run: make test-storybook
 
   e2e-test:
-    name: End-to-End Tests
-    runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
+    name: E2E Tests (${{ matrix.os }})
     if: github.event.inputs.test_filter == ''
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Linux: comprehensive E2E tests
+          - os: linux
+            runner: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
+            test_scope: "all"
+          # macOS: window lifecycle and platform-dependent tests only
+          - os: macos
+            runner: ${{ github.repository_owner == 'coder' && 'depot-macos-latest' || 'macos-latest' }}
+            test_scope: "window-lifecycle"
+    runs-on: ${{ matrix.runner }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -159,18 +171,24 @@ jobs:
 
       - uses: ./.github/actions/setup-mux
 
-      - name: Install xvfb
+      - name: Install xvfb (Linux)
+        if: matrix.os == 'linux'
         run: |
           sudo apt-get update
           sudo apt-get install -y xvfb
 
       - uses: ./.github/actions/setup-playwright
 
-      - name: Run e2e tests
+      - name: Run comprehensive e2e tests (Linux)
+        if: matrix.os == 'linux'
         run: xvfb-run -a make test-e2e
         env:
           ELECTRON_DISABLE_SANDBOX: 1
 
+      - name: Run window lifecycle e2e tests (macOS)
+        if: matrix.os == 'macos'
+        run: make test-e2e PLAYWRIGHT_ARGS="tests/e2e/scenarios/windowLifecycle.spec.ts"
+
   docker-smoke-test:
     name: Docker Smoke Test
     runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
diff --git a/src/node/services/mock/scenarios.ts b/src/node/services/mock/scenarios.ts
index 5c2e25546f..8ce58f5758 100644
--- a/src/node/services/mock/scenarios.ts
+++ b/src/node/services/mock/scenarios.ts
@@ -3,6 +3,7 @@ import * as review from "./scenarios/review";
 import * as toolFlows from "./scenarios/toolFlows";
 import * as slashCommands from "./scenarios/slashCommands";
 import * as permissionModes from "./scenarios/permissionModes";
+import * as errorScenarios from "./scenarios/errorScenarios";
 import type { ScenarioTurn } from "./scenarioTypes";
 
 export const allScenarios: ScenarioTurn[] = [
@@ -11,4 +12,5 @@ export const allScenarios: ScenarioTurn[] = [
   ...toolFlows.scenarios,
   ...slashCommands.scenarios,
   ...permissionModes.scenarios,
+  ...errorScenarios.scenarios,
 ];
diff --git a/src/node/services/mock/scenarios/errorScenarios.ts b/src/node/services/mock/scenarios/errorScenarios.ts
new file mode 100644
index 0000000000..0ce9b2fbfc
--- /dev/null
+++ b/src/node/services/mock/scenarios/errorScenarios.ts
@@ -0,0 +1,97 @@
+import type { ScenarioTurn } from "@/node/services/mock/scenarioTypes";
+import { STREAM_BASE_DELAY } from "@/node/services/mock/scenarioTypes";
+import { KNOWN_MODELS } from "@/common/constants/knownModels";
+
+export const ERROR_PROMPTS = {
+  TRIGGER_RATE_LIMIT: "Trigger rate limit error",
+  TRIGGER_API_ERROR: "Trigger API error",
+  TRIGGER_NETWORK_ERROR: "Trigger network error",
+} as const;
+
+export const ERROR_MESSAGES = {
+  RATE_LIMIT: "Rate limit exceeded. Please retry after 60 seconds.",
+  API_ERROR: "Internal server error occurred while processing the request.",
+  NETWORK_ERROR: "Network connection lost. Please check your internet connection.",
+} as const;
+
+const rateLimitErrorTurn: ScenarioTurn = {
+  user: {
+    text: ERROR_PROMPTS.TRIGGER_RATE_LIMIT,
+    thinkingLevel: "low",
+    mode: "exec",
+  },
+  assistant: {
+    messageId: "msg-error-ratelimit",
+    events: [
+      {
+        kind: "stream-start",
+        delay: 0,
+        messageId: "msg-error-ratelimit",
+        model: KNOWN_MODELS.GPT.id,
+      },
+      {
+        kind: "stream-delta",
+        delay: STREAM_BASE_DELAY,
+        text: "Processing your request...",
+      },
+      {
+        kind: "stream-error",
+        delay: STREAM_BASE_DELAY * 2,
+        error: ERROR_MESSAGES.RATE_LIMIT,
+        errorType: "rate_limit",
+      },
+    ],
+  },
+};
+
+const apiErrorTurn: ScenarioTurn = {
+  user: {
+    text: ERROR_PROMPTS.TRIGGER_API_ERROR,
+    thinkingLevel: "low",
+    mode: "exec",
+  },
+  assistant: {
+    messageId: "msg-error-api",
+    events: [
+      {
+        kind: "stream-start",
+        delay: 0,
+        messageId: "msg-error-api",
+        model: KNOWN_MODELS.GPT.id,
+      },
+      {
+        kind: "stream-error",
+        delay: STREAM_BASE_DELAY,
+        error: ERROR_MESSAGES.API_ERROR,
+        errorType: "server_error",
+      },
+    ],
+  },
+};
+
+const networkErrorTurn: ScenarioTurn = {
+  user: {
+    text: ERROR_PROMPTS.TRIGGER_NETWORK_ERROR,
+    thinkingLevel: "low",
+    mode: "exec",
+  },
+  assistant: {
+    messageId: "msg-error-network",
+    events: [
+      {
+        kind: "stream-start",
+        delay: 0,
+        messageId: "msg-error-network",
+        model: KNOWN_MODELS.GPT.id,
+      },
+      {
+        kind: "stream-error",
+        delay: STREAM_BASE_DELAY,
+        error: ERROR_MESSAGES.NETWORK_ERROR,
+        errorType: "network",
+      },
+    ],
+  },
+};
+
+export const scenarios: ScenarioTurn[] = [rateLimitErrorTurn, apiErrorTurn, networkErrorTurn];
diff --git a/tests/e2e/scenarios/persistence.spec.ts b/tests/e2e/scenarios/persistence.spec.ts
new file mode 100644
index 0000000000..37d1c09d86
--- /dev/null
+++ b/tests/e2e/scenarios/persistence.spec.ts
@@ -0,0 +1,41 @@
+import { electronTest as test, electronExpect as expect } from "../electronTest";
+import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat";
+
+test.skip(
+  ({ browserName }) => browserName !== "chromium",
+  "Electron scenario runs on chromium only"
+);
+
+test.describe("persistence", () => {
+  test("chat history persists across page reload", async ({ ui, page }) => {
+    await ui.projects.openFirstWorkspace();
+
+    await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+    await ui.chat.expectTranscriptContains("Python");
+
+    await page.reload();
+    await page.waitForLoadState("domcontentloaded");
+    await ui.projects.openFirstWorkspace();
+
+    await ui.chat.expectTranscriptContains("Python");
+  });
+
+  test("chat history survives settings navigation", async ({ ui }) => {
+    await ui.projects.openFirstWorkspace();
+
+    await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+
+    // Navigate through settings (potential state corruption points)
+    await ui.settings.open();
+    await ui.settings.selectSection("Models");
+    await ui.settings.selectSection("Providers");
+    await ui.settings.close();
+
+    await ui.chat.expectTranscriptContains("Python");
+    await ui.chat.expectTranscriptContains("JavaScript");
+  });
+});
diff --git a/tests/e2e/scenarios/streamingBehavior.spec.ts b/tests/e2e/scenarios/streamingBehavior.spec.ts
new file mode 100644
index 0000000000..70838a465e
--- /dev/null
+++ b/tests/e2e/scenarios/streamingBehavior.spec.ts
@@ -0,0 +1,77 @@
+import { electronTest as test, electronExpect as expect } from "../electronTest";
+import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat";
+import { ERROR_PROMPTS, ERROR_MESSAGES } from "@/node/services/mock/scenarios/errorScenarios";
+
+test.skip(
+  ({ browserName }) => browserName !== "chromium",
+  "Electron scenario runs on chromium only"
+);
+
+test.describe("streaming behavior", () => {
+  test("stream continues after settings modal opens", async ({ ui, page }) => {
+    await ui.projects.openFirstWorkspace();
+
+    const streamPromise = ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+
+    await page.waitForTimeout(50);
+    await ui.settings.open();
+    const timeline = await streamPromise;
+    await ui.settings.close();
+
+    expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true);
+    await ui.chat.expectTranscriptContains("Python");
+  });
+
+  test("mode switching doesn't break streaming", async ({ ui }) => {
+    await ui.projects.openFirstWorkspace();
+
+    await ui.chat.setMode("Exec");
+    await ui.chat.setMode("Plan");
+
+    const timeline = await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+
+    expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true);
+    await ui.chat.expectTranscriptContains("Python");
+  });
+
+  // Consolidate error tests using parameterization
+  for (const [errorType, prompt, expectedMessage] of [
+    ["rate limit", ERROR_PROMPTS.TRIGGER_RATE_LIMIT, ERROR_MESSAGES.RATE_LIMIT],
+    ["server", ERROR_PROMPTS.TRIGGER_API_ERROR, ERROR_MESSAGES.API_ERROR],
+    ["network", ERROR_PROMPTS.TRIGGER_NETWORK_ERROR, ERROR_MESSAGES.NETWORK_ERROR],
+  ] as const) {
+    test(`${errorType} error displays in transcript`, async ({ ui, page }) => {
+      await ui.projects.openFirstWorkspace();
+      await ui.chat.setMode("Exec");
+
+      const timeline = await ui.chat.captureStreamTimeline(async () => {
+        await ui.chat.sendMessage(prompt);
+      });
+
+      expect(timeline.events.some((e) => e.type === "stream-error")).toBe(true);
+      const transcript = page.getByRole("log", { name: "Conversation transcript" });
+      await expect(transcript.getByText(expectedMessage)).toBeVisible();
+    });
+  }
+
+  test("app recovers after error", async ({ ui }) => {
+    await ui.projects.openFirstWorkspace();
+    await ui.chat.setMode("Exec");
+
+    await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(ERROR_PROMPTS.TRIGGER_API_ERROR);
+    });
+
+    await ui.chat.setMode("Plan");
+    const timeline = await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+
+    expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true);
+    await ui.chat.expectTranscriptContains("Python");
+  });
+});
diff --git a/tests/e2e/scenarios/windowLifecycle.spec.ts b/tests/e2e/scenarios/windowLifecycle.spec.ts
new file mode 100644
index 0000000000..01ceffc574
--- /dev/null
+++ b/tests/e2e/scenarios/windowLifecycle.spec.ts
@@ -0,0 +1,61 @@
+import { electronTest as test, electronExpect as expect } from "../electronTest";
+import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat";
+
+test.skip(
+  ({ browserName }) => browserName !== "chromium",
+  "Electron scenario runs on chromium only"
+);
+
+test.describe("window lifecycle", () => {
+  test("window opens with expected structure", async ({ page }) => {
+    await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible();
+    await expect(page.locator("main, #root, .app-container").first()).toBeVisible();
+    await expect(page.getByRole("dialog", { name: /error/i })).not.toBeVisible();
+  });
+
+  test("workspace content loads correctly", async ({ ui, page }) => {
+    await ui.projects.openFirstWorkspace();
+    await expect(page.getByRole("log", { name: "Conversation transcript" })).toBeVisible();
+    await expect(page.getByRole("textbox", { name: /message/i })).toBeVisible();
+  });
+
+  test("survives rapid settings navigation", async ({ ui, page }) => {
+    await ui.projects.openFirstWorkspace();
+
+    // Stress test settings modal with rapid open/close/navigate
+    for (let i = 0; i < 3; i++) {
+      await ui.settings.open();
+      await ui.settings.selectSection("Providers");
+      await ui.settings.selectSection("Models");
+      await ui.settings.close();
+    }
+
+    // Verify app remains functional
+    await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible();
+    const chatInput = page.getByRole("textbox", { name: /message/i });
+    await expect(chatInput).toBeVisible();
+    await chatInput.click();
+    await expect(chatInput).toBeFocused();
+  });
+
+  // Exercises IPC handler stability under heavy use (regression: #851 duplicate handler registration)
+  test("IPC stable after heavy operations", async ({ ui, page }) => {
+    await ui.projects.openFirstWorkspace();
+
+    // Many IPC calls: stream + mode switches + settings navigation
+    const timeline = await ui.chat.captureStreamTimeline(async () => {
+      await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES);
+    });
+    expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true);
+
+    await ui.chat.setMode("Exec");
+    await ui.chat.setMode("Plan");
+    await ui.settings.open();
+    await ui.settings.selectSection("Providers");
+    await ui.settings.close();
+
+    // Verify app remains functional after all IPC calls
+    await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible();
+    await ui.chat.expectTranscriptContains("Python");
+  });
+});
diff --git a/tests/e2e/utils/ui.ts b/tests/e2e/utils/ui.ts
index d275551b90..9acb2f2d1d 100644
--- a/tests/e2e/utils/ui.ts
+++ b/tests/e2e/utils/ui.ts
@@ -345,7 +345,10 @@ export function createWorkspaceUI(page: Page, context: DemoProjectConfig): Works
             if (!capture) {
               return false;
             }
-            return capture.events.some((event) => event.type === "stream-end");
+            // Wait for either stream-end or stream-error to complete the capture
+            return capture.events.some(
+              (event) => event.type === "stream-end" || event.type === "stream-error"
+            );
           },
           workspaceId,
           { timeout: timeoutMs }