From 9e0d72487b74a8f461923a4887ce6412e97839e3 Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 3 Dec 2025 11:11:28 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20robust=20Electron?= =?UTF-8?q?=20E2E=20tests=20for=20regression=20prevention?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add E2E tests targeting recent regression patterns: - Window lifecycle: startup, workspace loading, rapid navigation, IPC stability - Streaming behavior: modal interruption, mode switching, error handling - Persistence: chat history survives reload and settings navigation Also: - Add mock error scenarios (rate limit, server error, network error) - Fix captureStreamTimeline to handle stream-error events - Run E2E tests on macOS (depot runner) for window lifecycle coverage - Fix macOS CI compatibility (grep -oE, conditional install-deps) _Generated with mux_ --- .github/actions/setup-playwright/action.yml | 7 +- .github/workflows/ci.yml | 26 ++++- src/node/services/mock/scenarios.ts | 2 + .../services/mock/scenarios/errorScenarios.ts | 97 +++++++++++++++++++ tests/e2e/scenarios/persistence.spec.ts | 41 ++++++++ tests/e2e/scenarios/streamingBehavior.spec.ts | 77 +++++++++++++++ tests/e2e/scenarios/windowLifecycle.spec.ts | 61 ++++++++++++ tests/e2e/utils/ui.ts | 5 +- 8 files changed, 308 insertions(+), 8 deletions(-) create mode 100644 src/node/services/mock/scenarios/errorScenarios.ts create mode 100644 tests/e2e/scenarios/persistence.spec.ts create mode 100644 tests/e2e/scenarios/streamingBehavior.spec.ts create mode 100644 tests/e2e/scenarios/windowLifecycle.spec.ts diff --git a/.github/actions/setup-playwright/action.yml b/.github/actions/setup-playwright/action.yml index d64bb82ae1..2c0df71897 100644 --- a/.github/actions/setup-playwright/action.yml +++ b/.github/actions/setup-playwright/action.yml @@ -12,8 +12,8 @@ runs: id: playwright-version shell: bash run: | - # Extract Playwright version from bun.lock - VERSION=$(grep -A1 '"playwright":' bun.lock | grep -oP '"\K[0-9]+\.[0-9]+\.[0-9]+' | head -1) + # Extract Playwright version from bun.lock (macOS-compatible regex) + VERSION=$(grep -A1 '"playwright":' bun.lock | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) echo "version=$VERSION" >> $GITHUB_OUTPUT echo "Playwright version: $VERSION" @@ -31,6 +31,7 @@ runs: shell: bash run: bun x playwright install ${{ inputs.browsers }} - - name: Install Playwright system dependencies + - name: Install Playwright system dependencies (Linux) + if: runner.os == 'Linux' shell: bash run: bun x playwright install-deps ${{ inputs.browsers }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1249877578..fc13a60377 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -148,9 +148,21 @@ jobs: run: make test-storybook e2e-test: - name: End-to-End Tests - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} + name: E2E Tests (${{ matrix.os }}) if: github.event.inputs.test_filter == '' + strategy: + fail-fast: false + matrix: + include: + # Linux: comprehensive E2E tests + - os: linux + runner: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} + test_scope: "all" + # macOS: window lifecycle and platform-dependent tests only + - os: macos + runner: ${{ github.repository_owner == 'coder' && 'depot-macos-latest' || 'macos-latest' }} + test_scope: "window-lifecycle" + runs-on: ${{ matrix.runner }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -159,18 +171,24 @@ jobs: - uses: ./.github/actions/setup-mux - - name: Install xvfb + - name: Install xvfb (Linux) + if: matrix.os == 'linux' run: | sudo apt-get update sudo apt-get install -y xvfb - uses: ./.github/actions/setup-playwright - - name: Run e2e tests + - name: Run comprehensive e2e tests (Linux) + if: matrix.os == 'linux' run: xvfb-run -a make test-e2e env: ELECTRON_DISABLE_SANDBOX: 1 + - name: Run window lifecycle e2e tests (macOS) + if: matrix.os == 'macos' + run: make test-e2e PLAYWRIGHT_ARGS="tests/e2e/scenarios/windowLifecycle.spec.ts" + docker-smoke-test: name: Docker Smoke Test runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }} diff --git a/src/node/services/mock/scenarios.ts b/src/node/services/mock/scenarios.ts index 5c2e25546f..8ce58f5758 100644 --- a/src/node/services/mock/scenarios.ts +++ b/src/node/services/mock/scenarios.ts @@ -3,6 +3,7 @@ import * as review from "./scenarios/review"; import * as toolFlows from "./scenarios/toolFlows"; import * as slashCommands from "./scenarios/slashCommands"; import * as permissionModes from "./scenarios/permissionModes"; +import * as errorScenarios from "./scenarios/errorScenarios"; import type { ScenarioTurn } from "./scenarioTypes"; export const allScenarios: ScenarioTurn[] = [ @@ -11,4 +12,5 @@ export const allScenarios: ScenarioTurn[] = [ ...toolFlows.scenarios, ...slashCommands.scenarios, ...permissionModes.scenarios, + ...errorScenarios.scenarios, ]; diff --git a/src/node/services/mock/scenarios/errorScenarios.ts b/src/node/services/mock/scenarios/errorScenarios.ts new file mode 100644 index 0000000000..0ce9b2fbfc --- /dev/null +++ b/src/node/services/mock/scenarios/errorScenarios.ts @@ -0,0 +1,97 @@ +import type { ScenarioTurn } from "@/node/services/mock/scenarioTypes"; +import { STREAM_BASE_DELAY } from "@/node/services/mock/scenarioTypes"; +import { KNOWN_MODELS } from "@/common/constants/knownModels"; + +export const ERROR_PROMPTS = { + TRIGGER_RATE_LIMIT: "Trigger rate limit error", + TRIGGER_API_ERROR: "Trigger API error", + TRIGGER_NETWORK_ERROR: "Trigger network error", +} as const; + +export const ERROR_MESSAGES = { + RATE_LIMIT: "Rate limit exceeded. Please retry after 60 seconds.", + API_ERROR: "Internal server error occurred while processing the request.", + NETWORK_ERROR: "Network connection lost. Please check your internet connection.", +} as const; + +const rateLimitErrorTurn: ScenarioTurn = { + user: { + text: ERROR_PROMPTS.TRIGGER_RATE_LIMIT, + thinkingLevel: "low", + mode: "exec", + }, + assistant: { + messageId: "msg-error-ratelimit", + events: [ + { + kind: "stream-start", + delay: 0, + messageId: "msg-error-ratelimit", + model: KNOWN_MODELS.GPT.id, + }, + { + kind: "stream-delta", + delay: STREAM_BASE_DELAY, + text: "Processing your request...", + }, + { + kind: "stream-error", + delay: STREAM_BASE_DELAY * 2, + error: ERROR_MESSAGES.RATE_LIMIT, + errorType: "rate_limit", + }, + ], + }, +}; + +const apiErrorTurn: ScenarioTurn = { + user: { + text: ERROR_PROMPTS.TRIGGER_API_ERROR, + thinkingLevel: "low", + mode: "exec", + }, + assistant: { + messageId: "msg-error-api", + events: [ + { + kind: "stream-start", + delay: 0, + messageId: "msg-error-api", + model: KNOWN_MODELS.GPT.id, + }, + { + kind: "stream-error", + delay: STREAM_BASE_DELAY, + error: ERROR_MESSAGES.API_ERROR, + errorType: "server_error", + }, + ], + }, +}; + +const networkErrorTurn: ScenarioTurn = { + user: { + text: ERROR_PROMPTS.TRIGGER_NETWORK_ERROR, + thinkingLevel: "low", + mode: "exec", + }, + assistant: { + messageId: "msg-error-network", + events: [ + { + kind: "stream-start", + delay: 0, + messageId: "msg-error-network", + model: KNOWN_MODELS.GPT.id, + }, + { + kind: "stream-error", + delay: STREAM_BASE_DELAY, + error: ERROR_MESSAGES.NETWORK_ERROR, + errorType: "network", + }, + ], + }, +}; + +export const scenarios: ScenarioTurn[] = [rateLimitErrorTurn, apiErrorTurn, networkErrorTurn]; diff --git a/tests/e2e/scenarios/persistence.spec.ts b/tests/e2e/scenarios/persistence.spec.ts new file mode 100644 index 0000000000..37d1c09d86 --- /dev/null +++ b/tests/e2e/scenarios/persistence.spec.ts @@ -0,0 +1,41 @@ +import { electronTest as test, electronExpect as expect } from "../electronTest"; +import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat"; + +test.skip( + ({ browserName }) => browserName !== "chromium", + "Electron scenario runs on chromium only" +); + +test.describe("persistence", () => { + test("chat history persists across page reload", async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + + await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + await ui.chat.expectTranscriptContains("Python"); + + await page.reload(); + await page.waitForLoadState("domcontentloaded"); + await ui.projects.openFirstWorkspace(); + + await ui.chat.expectTranscriptContains("Python"); + }); + + test("chat history survives settings navigation", async ({ ui }) => { + await ui.projects.openFirstWorkspace(); + + await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + + // Navigate through settings (potential state corruption points) + await ui.settings.open(); + await ui.settings.selectSection("Models"); + await ui.settings.selectSection("Providers"); + await ui.settings.close(); + + await ui.chat.expectTranscriptContains("Python"); + await ui.chat.expectTranscriptContains("JavaScript"); + }); +}); diff --git a/tests/e2e/scenarios/streamingBehavior.spec.ts b/tests/e2e/scenarios/streamingBehavior.spec.ts new file mode 100644 index 0000000000..70838a465e --- /dev/null +++ b/tests/e2e/scenarios/streamingBehavior.spec.ts @@ -0,0 +1,77 @@ +import { electronTest as test, electronExpect as expect } from "../electronTest"; +import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat"; +import { ERROR_PROMPTS, ERROR_MESSAGES } from "@/node/services/mock/scenarios/errorScenarios"; + +test.skip( + ({ browserName }) => browserName !== "chromium", + "Electron scenario runs on chromium only" +); + +test.describe("streaming behavior", () => { + test("stream continues after settings modal opens", async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + + const streamPromise = ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + + await page.waitForTimeout(50); + await ui.settings.open(); + const timeline = await streamPromise; + await ui.settings.close(); + + expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true); + await ui.chat.expectTranscriptContains("Python"); + }); + + test("mode switching doesn't break streaming", async ({ ui }) => { + await ui.projects.openFirstWorkspace(); + + await ui.chat.setMode("Exec"); + await ui.chat.setMode("Plan"); + + const timeline = await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + + expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true); + await ui.chat.expectTranscriptContains("Python"); + }); + + // Consolidate error tests using parameterization + for (const [errorType, prompt, expectedMessage] of [ + ["rate limit", ERROR_PROMPTS.TRIGGER_RATE_LIMIT, ERROR_MESSAGES.RATE_LIMIT], + ["server", ERROR_PROMPTS.TRIGGER_API_ERROR, ERROR_MESSAGES.API_ERROR], + ["network", ERROR_PROMPTS.TRIGGER_NETWORK_ERROR, ERROR_MESSAGES.NETWORK_ERROR], + ] as const) { + test(`${errorType} error displays in transcript`, async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + await ui.chat.setMode("Exec"); + + const timeline = await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(prompt); + }); + + expect(timeline.events.some((e) => e.type === "stream-error")).toBe(true); + const transcript = page.getByRole("log", { name: "Conversation transcript" }); + await expect(transcript.getByText(expectedMessage)).toBeVisible(); + }); + } + + test("app recovers after error", async ({ ui }) => { + await ui.projects.openFirstWorkspace(); + await ui.chat.setMode("Exec"); + + await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(ERROR_PROMPTS.TRIGGER_API_ERROR); + }); + + await ui.chat.setMode("Plan"); + const timeline = await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + + expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true); + await ui.chat.expectTranscriptContains("Python"); + }); +}); diff --git a/tests/e2e/scenarios/windowLifecycle.spec.ts b/tests/e2e/scenarios/windowLifecycle.spec.ts new file mode 100644 index 0000000000..01ceffc574 --- /dev/null +++ b/tests/e2e/scenarios/windowLifecycle.spec.ts @@ -0,0 +1,61 @@ +import { electronTest as test, electronExpect as expect } from "../electronTest"; +import { LIST_PROGRAMMING_LANGUAGES } from "@/node/services/mock/scenarios/basicChat"; + +test.skip( + ({ browserName }) => browserName !== "chromium", + "Electron scenario runs on chromium only" +); + +test.describe("window lifecycle", () => { + test("window opens with expected structure", async ({ page }) => { + await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible(); + await expect(page.locator("main, #root, .app-container").first()).toBeVisible(); + await expect(page.getByRole("dialog", { name: /error/i })).not.toBeVisible(); + }); + + test("workspace content loads correctly", async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + await expect(page.getByRole("log", { name: "Conversation transcript" })).toBeVisible(); + await expect(page.getByRole("textbox", { name: /message/i })).toBeVisible(); + }); + + test("survives rapid settings navigation", async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + + // Stress test settings modal with rapid open/close/navigate + for (let i = 0; i < 3; i++) { + await ui.settings.open(); + await ui.settings.selectSection("Providers"); + await ui.settings.selectSection("Models"); + await ui.settings.close(); + } + + // Verify app remains functional + await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible(); + const chatInput = page.getByRole("textbox", { name: /message/i }); + await expect(chatInput).toBeVisible(); + await chatInput.click(); + await expect(chatInput).toBeFocused(); + }); + + // Exercises IPC handler stability under heavy use (regression: #851 duplicate handler registration) + test("IPC stable after heavy operations", async ({ ui, page }) => { + await ui.projects.openFirstWorkspace(); + + // Many IPC calls: stream + mode switches + settings navigation + const timeline = await ui.chat.captureStreamTimeline(async () => { + await ui.chat.sendMessage(LIST_PROGRAMMING_LANGUAGES); + }); + expect(timeline.events.some((e) => e.type === "stream-end")).toBe(true); + + await ui.chat.setMode("Exec"); + await ui.chat.setMode("Plan"); + await ui.settings.open(); + await ui.settings.selectSection("Providers"); + await ui.settings.close(); + + // Verify app remains functional after all IPC calls + await expect(page.getByRole("navigation", { name: "Projects" })).toBeVisible(); + await ui.chat.expectTranscriptContains("Python"); + }); +}); diff --git a/tests/e2e/utils/ui.ts b/tests/e2e/utils/ui.ts index d275551b90..9acb2f2d1d 100644 --- a/tests/e2e/utils/ui.ts +++ b/tests/e2e/utils/ui.ts @@ -345,7 +345,10 @@ export function createWorkspaceUI(page: Page, context: DemoProjectConfig): Works if (!capture) { return false; } - return capture.events.some((event) => event.type === "stream-end"); + // Wait for either stream-end or stream-error to complete the capture + return capture.events.some( + (event) => event.type === "stream-end" || event.type === "stream-error" + ); }, workspaceId, { timeout: timeoutMs }