Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

export { SentienceBrowser, PermissionPolicy } from './browser';
export { snapshot, SnapshotOptions } from './snapshot';
export { snapshot, SnapshotOptions, SnapshotGatewayError } from './snapshot';
export { query, find, parseSelector } from './query';
export {
back,
Expand Down
78 changes: 74 additions & 4 deletions src/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,44 @@
// Maximum payload size for API requests (10MB server limit)
const MAX_PAYLOAD_BYTES = 10 * 1024 * 1024;

/**
* Structured error for server-side (gateway) snapshot failures.
*
* Keeps HTTP status/URL/response details available to callers for better logging/debugging.
*/
export class SnapshotGatewayError extends Error {
public statusCode?: number;
public url?: string;
public requestId?: string;
public responseText?: string;
public cause?: unknown;

constructor(
message: string,
opts?: {
statusCode?: number;
url?: string;
requestId?: string;
responseText?: string;
cause?: unknown;
}
) {
super(message);
this.name = 'SnapshotGatewayError';
this.statusCode = opts?.statusCode;
this.url = opts?.url;
this.requestId = opts?.requestId;
this.responseText = opts?.responseText;
this.cause = opts?.cause;
}

static snip(s: string | undefined, n: number = 400): string | undefined {
if (!s) return undefined;
const t = String(s).replace(/\r/g, ' ').replace(/\n/g, ' ').trim();
return t.slice(0, n);
}
}

export interface SnapshotOptions {
screenshot?: boolean | { format: 'png' | 'jpeg'; quality?: number };
limit?: number;
Expand Down Expand Up @@ -202,6 +240,7 @@
if (!page) {
throw new Error('Browser not started. Call start() first.');
}
const gatewayUrl = `${apiUrl}/v1/snapshot`;

// CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
// Even for API mode, we need the extension to collect raw data locally
Expand All @@ -211,7 +250,7 @@
() => typeof (window as any).sentience !== 'undefined',
5000
);
} catch (_e) {

Check warning on line 253 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'_e' is defined but never used

Check warning on line 253 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'_e' is defined but never used

Check warning on line 253 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'_e' is defined but never used
throw new Error(
'Sentience extension failed to inject. Cannot collect raw data for API processing.'
);
Expand Down Expand Up @@ -278,15 +317,38 @@
};

try {
const response = await fetch(`${apiUrl}/v1/snapshot`, {
const response = await fetch(gatewayUrl, {
method: 'POST',
headers,
body: payloadJson,
});

if (!response.ok) {
const errorText = await response.text();
throw new Error(`API request failed: ${response.status} ${errorText}`);
let errorText: string | undefined = undefined;
try {
errorText = await response.text();
} catch (_e) {

Check warning on line 330 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (macos-latest, 20)

'_e' is defined but never used

Check warning on line 330 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 20)

'_e' is defined but never used

Check warning on line 330 in src/snapshot.ts

View workflow job for this annotation

GitHub Actions / test (windows-latest, 20)

'_e' is defined but never used
errorText = undefined;
}
const requestId =
response.headers.get('x-request-id') || response.headers.get('x-trace-id') || undefined;
const bodySnip = SnapshotGatewayError.snip(errorText);

const parts: string[] = [];
parts.push(`status=${response.status}`);
parts.push(`url=${gatewayUrl}`);
if (requestId) parts.push(`request_id=${requestId}`);
if (bodySnip) parts.push(`body=${bodySnip}`);

throw new SnapshotGatewayError(
`Server-side snapshot API failed: ${parts.join(' ')}. Try using use_api: false to use local extension instead.`,
{
statusCode: response.status,
url: gatewayUrl,
requestId,
responseText: bodySnip,
}
);
}

const apiResult = await response.json();
Expand Down Expand Up @@ -359,6 +421,14 @@

return snapshotData;
} catch (e: any) {
throw new Error(`API request failed: ${e.message}`);
if (e instanceof SnapshotGatewayError) {
throw e;
}
const errType = e instanceof Error ? e.name : typeof e;
const errMsg = e instanceof Error ? `${e.name}: ${e.message}` : String(e);
throw new SnapshotGatewayError(
`Server-side snapshot API failed: url=${gatewayUrl} err_type=${SnapshotGatewayError.snip(errType, 80)} err=${SnapshotGatewayError.snip(errMsg, 220)}. Try using use_api: false to use local extension instead.`,
{ url: gatewayUrl, cause: e }
);
}
}
78 changes: 69 additions & 9 deletions src/tracing/cloud-sink.ts
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,57 @@ export class CloudTraceSink extends TraceSink {
});
}

/**
* Normalize screenshot data by extracting base64 from data URL if needed.
*
* Handles both formats:
* - Data URL: "..."
* - Pure base64: "/9j/4AAQ..."
*
* @param screenshotRaw - Raw screenshot data (data URL or base64)
* @param defaultFormat - Default format if not detected from data URL
* @returns Tuple of [base64String, formatString]
*/
private _normalizeScreenshotData(
screenshotRaw: string,
defaultFormat: string = 'jpeg'
): [string, string] {
if (!screenshotRaw) {
return ['', defaultFormat];
}

// Check if it's a data URL
if (screenshotRaw.startsWith('data:image')) {
// Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
try {
// Split on comma to get the base64 part
if (screenshotRaw.includes(',')) {
const [header, base64Data] = screenshotRaw.split(',', 2);
// Extract format from header: "data:image/jpeg;base64"
if (header.includes('/') && header.includes(';')) {
const formatPart = header.split('/')[1]?.split(';')[0];
if (formatPart === 'jpeg' || formatPart === 'jpg') {
return [base64Data, 'jpeg'];
} else if (formatPart === 'png') {
return [base64Data, 'png'];
}
}
return [base64Data, defaultFormat];
} else {
// Malformed data URL - return as-is with warning
this.logger?.warn('Malformed data URL in screenshot_base64 (missing comma)');
return [screenshotRaw, defaultFormat];
}
} catch (error: any) {
this.logger?.warn(`Error parsing screenshot data URL: ${error.message}`);
return [screenshotRaw, defaultFormat];
}
}

// Already pure base64
return [screenshotRaw, defaultFormat];
}

/**
* Extract screenshots from trace events.
*
Expand All @@ -798,15 +849,24 @@ export class CloudTraceSink extends TraceSink {
// Check if this is a snapshot event with screenshot
if (event.type === 'snapshot') {
const data = event.data || {};
const screenshotBase64 = data.screenshot_base64;

if (screenshotBase64) {
sequence += 1;
screenshots.set(sequence, {
base64: screenshotBase64,
format: data.screenshot_format || 'jpeg',
stepId: event.step_id,
});
const screenshotRaw = data.screenshot_base64;

if (screenshotRaw) {
// Normalize: extract base64 from data URL if needed
// Handles both "data:image/jpeg;base64,..." and pure base64
const [screenshotBase64, screenshotFormat] = this._normalizeScreenshotData(
screenshotRaw,
data.screenshot_format || 'jpeg'
);

if (screenshotBase64) {
sequence += 1;
screenshots.set(sequence, {
base64: screenshotBase64,
format: screenshotFormat,
stepId: event.step_id,
});
}
}
}
} catch {
Expand Down
90 changes: 90 additions & 0 deletions tests/tracing/cloud-sink.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,96 @@ describe('CloudTraceSink', () => {
});
});

describe('Screenshot data URL handling', () => {
it('should normalize screenshot data URLs to pure base64', async () => {
const runId = 'test-run-' + Date.now();
const sink = new CloudTraceSink(uploadUrl, runId);

// Test the private _normalizeScreenshotData method via type casting
const sinkAny = sink as any;

// Test JPEG data URL
const [jpegBase64, jpegFormat] = sinkAny._normalizeScreenshotData(
'...',
'png'
);
expect(jpegBase64).toBe('/9j/4AAQSkZJRg...');
expect(jpegFormat).toBe('jpeg');

// Test PNG data URL
const [pngBase64, pngFormat] = sinkAny._normalizeScreenshotData(
'...',
'jpeg'
);
expect(pngBase64).toBe('iVBORw0KGgoAAAA...');
expect(pngFormat).toBe('png');

// Test pure base64 (should pass through unchanged)
const [pureBase64, pureFormat] = sinkAny._normalizeScreenshotData(
'/9j/4AAQSkZJRg...',
'jpeg'
);
expect(pureBase64).toBe('/9j/4AAQSkZJRg...');
expect(pureFormat).toBe('jpeg');

// Test empty string
const [emptyBase64, emptyFormat] = sinkAny._normalizeScreenshotData('', 'jpeg');
expect(emptyBase64).toBe('');
expect(emptyFormat).toBe('jpeg');

// Clean up
await sink.close();
});

it('should handle data URL screenshots when extracting from trace', async () => {
const runId = 'test-run-' + Date.now();
const sink = new CloudTraceSink(uploadUrl, runId);

// Create test screenshot as a data URL (how some demos send it)
const testImageBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAE=';
const dataUrl = `data:image/png;base64,${testImageBase64}`;

// Emit snapshot event with data URL
sink.emit({
v: 1,
type: 'snapshot',
ts: new Date().toISOString(),
run_id: runId,
seq: 1,
step_id: 'step-1',
data: {
url: 'https://example.com',
element_count: 10,
screenshot_base64: dataUrl, // Data URL, not pure base64
screenshot_format: 'png',
},
});

// Close the write stream first
const sinkAny = sink as any;
if (sinkAny.writeStream && !sinkAny.writeStream.destroyed) {
await new Promise<void>(resolve => {
sinkAny.writeStream.end(() => resolve());
});
}

// Extract screenshots - should normalize data URL to pure base64
const screenshots = await sinkAny._extractScreenshotsFromTrace();

expect(screenshots.size).toBe(1);
expect(screenshots.has(1)).toBe(true);

const screenshot = screenshots.get(1);
// Verify the base64 was extracted from data URL (no "data:image" prefix)
expect(screenshot.base64).toBe(testImageBase64);
expect(screenshot.base64.startsWith('data:')).toBe(false);
expect(screenshot.format).toBe('png');

// Clean up
await sink.close();
});
});

describe('Index upload', () => {
let indexServer: http.Server;
let indexServerPort: number;
Expand Down
Loading