From bee602514187522f7cd39388b7d22d5f538de81f Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Fri, 30 Jan 2026 20:54:45 -0800 Subject: [PATCH 1/3] pass gateway error exception messages --- src/index.ts | 2 +- src/snapshot.ts | 77 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/index.ts b/src/index.ts index a42f011..81f741a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,7 +3,7 @@ */ export { SentienceBrowser, PermissionPolicy } from './browser'; -export { snapshot, SnapshotOptions } from './snapshot'; +export { snapshot, SnapshotOptions, SnapshotGatewayError } from './snapshot'; export { query, find, parseSelector } from './query'; export { back, diff --git a/src/snapshot.ts b/src/snapshot.ts index 29b45f2..692df4a 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -11,6 +11,44 @@ import { BrowserEvaluator } from './utils/browser-evaluator'; // Maximum payload size for API requests (10MB server limit) const MAX_PAYLOAD_BYTES = 10 * 1024 * 1024; +/** + * Structured error for server-side (gateway) snapshot failures. + * + * Keeps HTTP status/URL/response details available to callers for better logging/debugging. + */ +export class SnapshotGatewayError extends Error { + public statusCode?: number; + public url?: string; + public requestId?: string; + public responseText?: string; + public cause?: unknown; + + constructor( + message: string, + opts?: { + statusCode?: number; + url?: string; + requestId?: string; + responseText?: string; + cause?: unknown; + } + ) { + super(message); + this.name = 'SnapshotGatewayError'; + this.statusCode = opts?.statusCode; + this.url = opts?.url; + this.requestId = opts?.requestId; + this.responseText = opts?.responseText; + this.cause = opts?.cause; + } + + static snip(s: string | undefined, n: number = 400): string | undefined { + if (!s) return undefined; + const t = String(s).replace(/\r/g, ' ').replace(/\n/g, ' ').trim(); + return t.slice(0, n); + } +} + export interface SnapshotOptions { screenshot?: boolean | { format: 'png' | 'jpeg'; quality?: number }; limit?: number; @@ -202,6 +240,7 @@ async function snapshotViaApi( if (!page) { throw new Error('Browser not started. Call start() first.'); } + const gatewayUrl = `${apiUrl}/v1/snapshot`; // CRITICAL: Wait for extension injection to complete (CSP-resistant architecture) // Even for API mode, we need the extension to collect raw data locally @@ -278,15 +317,38 @@ async function snapshotViaApi( }; try { - const response = await fetch(`${apiUrl}/v1/snapshot`, { + const response = await fetch(gatewayUrl, { method: 'POST', headers, body: payloadJson, }); if (!response.ok) { - const errorText = await response.text(); - throw new Error(`API request failed: ${response.status} ${errorText}`); + let errorText: string | undefined = undefined; + try { + errorText = await response.text(); + } catch (_e) { + errorText = undefined; + } + const requestId = + response.headers.get('x-request-id') || response.headers.get('x-trace-id') || undefined; + const bodySnip = SnapshotGatewayError.snip(errorText); + + const parts: string[] = []; + parts.push(`status=${response.status}`); + parts.push(`url=${gatewayUrl}`); + if (requestId) parts.push(`request_id=${requestId}`); + if (bodySnip) parts.push(`body=${bodySnip}`); + + throw new SnapshotGatewayError( + `Server-side snapshot API failed: ${parts.join(' ')}. Try using use_api: false to use local extension instead.`, + { + statusCode: response.status, + url: gatewayUrl, + requestId, + responseText: bodySnip, + } + ); } const apiResult = await response.json(); @@ -359,6 +421,13 @@ async function snapshotViaApi( return snapshotData; } catch (e: any) { - throw new Error(`API request failed: ${e.message}`); + if (e instanceof SnapshotGatewayError) { + throw e; + } + const errMsg = e instanceof Error ? `${e.name}: ${e.message}` : String(e); + throw new SnapshotGatewayError( + `Server-side snapshot API failed: url=${gatewayUrl} err=${SnapshotGatewayError.snip(errMsg, 220)}. Try using use_api: false to use local extension instead.`, + { url: gatewayUrl, cause: e } + ); } } From f351d18e2b8f11cd8028b4f7eac54eda3d329944 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Fri, 30 Jan 2026 21:18:27 -0800 Subject: [PATCH 2/3] include network error --- src/snapshot.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/snapshot.ts b/src/snapshot.ts index 692df4a..2e07ac9 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -424,9 +424,10 @@ async function snapshotViaApi( if (e instanceof SnapshotGatewayError) { throw e; } + const errType = e instanceof Error ? e.name : typeof e; const errMsg = e instanceof Error ? `${e.name}: ${e.message}` : String(e); throw new SnapshotGatewayError( - `Server-side snapshot API failed: url=${gatewayUrl} err=${SnapshotGatewayError.snip(errMsg, 220)}. Try using use_api: false to use local extension instead.`, + `Server-side snapshot API failed: url=${gatewayUrl} err_type=${SnapshotGatewayError.snip(errType, 80)} err=${SnapshotGatewayError.snip(errMsg, 220)}. Try using use_api: false to use local extension instead.`, { url: gatewayUrl, cause: e } ); } From 687e95be9a4bd789451a1a5b99e6f0a48364bb86 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sat, 31 Jan 2026 20:52:22 -0800 Subject: [PATCH 3/3] hardening validation rule for cloud trace sink fields image --- src/tracing/cloud-sink.ts | 78 +++++++++++++++++++++++---- tests/tracing/cloud-sink.test.ts | 90 ++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 9 deletions(-) diff --git a/src/tracing/cloud-sink.ts b/src/tracing/cloud-sink.ts index bf385c1..d2dc4ff 100644 --- a/src/tracing/cloud-sink.ts +++ b/src/tracing/cloud-sink.ts @@ -773,6 +773,57 @@ export class CloudTraceSink extends TraceSink { }); } + /** + * Normalize screenshot data by extracting base64 from data URL if needed. + * + * Handles both formats: + * - Data URL: "data:image/jpeg;base64,/9j/4AAQ..." + * - Pure base64: "/9j/4AAQ..." + * + * @param screenshotRaw - Raw screenshot data (data URL or base64) + * @param defaultFormat - Default format if not detected from data URL + * @returns Tuple of [base64String, formatString] + */ + private _normalizeScreenshotData( + screenshotRaw: string, + defaultFormat: string = 'jpeg' + ): [string, string] { + if (!screenshotRaw) { + return ['', defaultFormat]; + } + + // Check if it's a data URL + if (screenshotRaw.startsWith('data:image')) { + // Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..." + try { + // Split on comma to get the base64 part + if (screenshotRaw.includes(',')) { + const [header, base64Data] = screenshotRaw.split(',', 2); + // Extract format from header: "data:image/jpeg;base64" + if (header.includes('/') && header.includes(';')) { + const formatPart = header.split('/')[1]?.split(';')[0]; + if (formatPart === 'jpeg' || formatPart === 'jpg') { + return [base64Data, 'jpeg']; + } else if (formatPart === 'png') { + return [base64Data, 'png']; + } + } + return [base64Data, defaultFormat]; + } else { + // Malformed data URL - return as-is with warning + this.logger?.warn('Malformed data URL in screenshot_base64 (missing comma)'); + return [screenshotRaw, defaultFormat]; + } + } catch (error: any) { + this.logger?.warn(`Error parsing screenshot data URL: ${error.message}`); + return [screenshotRaw, defaultFormat]; + } + } + + // Already pure base64 + return [screenshotRaw, defaultFormat]; + } + /** * Extract screenshots from trace events. * @@ -798,15 +849,24 @@ export class CloudTraceSink extends TraceSink { // Check if this is a snapshot event with screenshot if (event.type === 'snapshot') { const data = event.data || {}; - const screenshotBase64 = data.screenshot_base64; - - if (screenshotBase64) { - sequence += 1; - screenshots.set(sequence, { - base64: screenshotBase64, - format: data.screenshot_format || 'jpeg', - stepId: event.step_id, - }); + const screenshotRaw = data.screenshot_base64; + + if (screenshotRaw) { + // Normalize: extract base64 from data URL if needed + // Handles both "data:image/jpeg;base64,..." and pure base64 + const [screenshotBase64, screenshotFormat] = this._normalizeScreenshotData( + screenshotRaw, + data.screenshot_format || 'jpeg' + ); + + if (screenshotBase64) { + sequence += 1; + screenshots.set(sequence, { + base64: screenshotBase64, + format: screenshotFormat, + stepId: event.step_id, + }); + } } } } catch { diff --git a/tests/tracing/cloud-sink.test.ts b/tests/tracing/cloud-sink.test.ts index f50c081..8e3a415 100644 --- a/tests/tracing/cloud-sink.test.ts +++ b/tests/tracing/cloud-sink.test.ts @@ -339,6 +339,96 @@ describe('CloudTraceSink', () => { }); }); + describe('Screenshot data URL handling', () => { + it('should normalize screenshot data URLs to pure base64', async () => { + const runId = 'test-run-' + Date.now(); + const sink = new CloudTraceSink(uploadUrl, runId); + + // Test the private _normalizeScreenshotData method via type casting + const sinkAny = sink as any; + + // Test JPEG data URL + const [jpegBase64, jpegFormat] = sinkAny._normalizeScreenshotData( + 'data:image/jpeg;base64,/9j/4AAQSkZJRg...', + 'png' + ); + expect(jpegBase64).toBe('/9j/4AAQSkZJRg...'); + expect(jpegFormat).toBe('jpeg'); + + // Test PNG data URL + const [pngBase64, pngFormat] = sinkAny._normalizeScreenshotData( + 'data:image/png;base64,iVBORw0KGgoAAAA...', + 'jpeg' + ); + expect(pngBase64).toBe('iVBORw0KGgoAAAA...'); + expect(pngFormat).toBe('png'); + + // Test pure base64 (should pass through unchanged) + const [pureBase64, pureFormat] = sinkAny._normalizeScreenshotData( + '/9j/4AAQSkZJRg...', + 'jpeg' + ); + expect(pureBase64).toBe('/9j/4AAQSkZJRg...'); + expect(pureFormat).toBe('jpeg'); + + // Test empty string + const [emptyBase64, emptyFormat] = sinkAny._normalizeScreenshotData('', 'jpeg'); + expect(emptyBase64).toBe(''); + expect(emptyFormat).toBe('jpeg'); + + // Clean up + await sink.close(); + }); + + it('should handle data URL screenshots when extracting from trace', async () => { + const runId = 'test-run-' + Date.now(); + const sink = new CloudTraceSink(uploadUrl, runId); + + // Create test screenshot as a data URL (how some demos send it) + const testImageBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAE='; + const dataUrl = `data:image/png;base64,${testImageBase64}`; + + // Emit snapshot event with data URL + sink.emit({ + v: 1, + type: 'snapshot', + ts: new Date().toISOString(), + run_id: runId, + seq: 1, + step_id: 'step-1', + data: { + url: 'https://example.com', + element_count: 10, + screenshot_base64: dataUrl, // Data URL, not pure base64 + screenshot_format: 'png', + }, + }); + + // Close the write stream first + const sinkAny = sink as any; + if (sinkAny.writeStream && !sinkAny.writeStream.destroyed) { + await new Promise(resolve => { + sinkAny.writeStream.end(() => resolve()); + }); + } + + // Extract screenshots - should normalize data URL to pure base64 + const screenshots = await sinkAny._extractScreenshotsFromTrace(); + + expect(screenshots.size).toBe(1); + expect(screenshots.has(1)).toBe(true); + + const screenshot = screenshots.get(1); + // Verify the base64 was extracted from data URL (no "data:image" prefix) + expect(screenshot.base64).toBe(testImageBase64); + expect(screenshot.base64.startsWith('data:')).toBe(false); + expect(screenshot.format).toBe('png'); + + // Clean up + await sink.close(); + }); + }); + describe('Index upload', () => { let indexServer: http.Server; let indexServerPort: number;