Skip to content

Commit cadd192

Browse files
tkattkatmiguelg719
andauthored
Update screenshot collector (#1373)
# why - when transitioning to v3, we did not use the latest version of screenshot collector - screenshot collector currently fails due to not having page.on and page.off support for the load, and domcontentloaded events. # what changed - added latest version of screenshot collector # test plan - ran evals in cli with additional logging to also verify everything is working as expected <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Updated the evals CLI screenshot collector to the latest version, adding image-diff filtering and a V3 event bus that emits agent screenshots. This reduces duplicate screenshots and stabilizes capture on v3 pages where navigation events are disabled. - **New Features** - Skip similar screenshots using MSE/SSIM thresholds with sharp. - Event bus integration: agents emit screenshots; collector can ingest them. - Non-blocking initial/final captures and safer interval capture with error handling. - **Dependencies** - Added sharp ^0.34.5 for image processing (evals and core). - Patch bump via changeset for @browserbasehq/stagehand-evals. <sup>Written for commit f4e90f8. Summary will update automatically on new commits.</sup> <!-- End of auto-generated description by cubic. --> --------- Co-authored-by: miguel <miguelg71921@gmail.com> Co-authored-by: Miguel <36487034+miguelg719@users.noreply.github.com>
1 parent 30b0e12 commit cadd192

File tree

15 files changed

+547
-96
lines changed

15 files changed

+547
-96
lines changed

.changeset/beige-taxes-punch.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand-evals": patch
3+
---
4+
5+
Update screenshot collector in agent evals cli

.prettierignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,8 @@ packages/core/lib/dom/build/
1212
packages/core/lib/v3/dom/build/
1313
packages/evals/dist/
1414
packages/docs/
15-
*.min.js
15+
*.min.js
16+
.browserbase/
17+
.browserbase/**
18+
**/.browserbase/
19+
**/.browserbase/**

eslint.config.mjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ export default [
1414
"packages/core/lib/v3/dom/build/**",
1515
"**/*.config.js",
1616
"**/*.config.mjs",
17+
".browserbase/**",
18+
"**/.browserbase/**",
19+
"**/*.json",
1720
],
1821
},
1922
pluginJs.configs.recommended,

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"build": "turbo run build",
88
"build:cli": "turbo run build:cli",
99
"lint": "turbo run lint",
10-
"format": "turbo run format",
10+
"format": "prettier --write .",
1111
"prettier": "prettier --write .",
1212
"eslint": "eslint .",
1313
"test": "turbo run test",

packages/core/lib/utils.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { ModelProvider } from "./v3/types/public/model";
77
import { ZodPathSegments } from "./v3/types/private/internal";
88
import type { StagehandZodSchema } from "./v3/zodCompat";
99
import { isZod4Schema } from "./v3/zodCompat";
10+
import sharp from "sharp";
1011

1112
const ID_PATTERN = /^\d+-\d+$/;
1213

@@ -837,3 +838,21 @@ export function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny {
837838
return z.any();
838839
}
839840
}
841+
842+
export async function imageResize(
843+
img: Buffer,
844+
scaleFactor: number,
845+
): Promise<Buffer> {
846+
const metadata = await sharp(img).metadata();
847+
// calculate new dimensions
848+
const width = Math.round(metadata.width * scaleFactor);
849+
const height = Math.round(metadata.height * scaleFactor);
850+
return await sharp(img)
851+
.resize(width, height, { fit: "inside", kernel: sharp.kernel.lanczos3 })
852+
.png({
853+
compressionLevel: 9,
854+
adaptiveFiltering: true,
855+
palette: true,
856+
})
857+
.toBuffer();
858+
}

packages/core/lib/v3/handlers/v3AgentHandler.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,17 @@ export class V3AgentHandler {
171171
}
172172
}
173173
state.currentPageUrl = (await this.v3.context.awaitActivePage()).url();
174+
175+
// Capture screenshot after tool execution and emit event
176+
try {
177+
await this.captureAndEmitScreenshot();
178+
} catch (e) {
179+
this.logger({
180+
category: "agent",
181+
message: `Warning: Failed to capture screenshot: ${getErrorMessage(e)}`,
182+
level: 1,
183+
});
184+
}
174185
}
175186

176187
if (userCallback) {
@@ -448,4 +459,21 @@ export class V3AgentHandler {
448459
}
449460
return stepCountIs(maxSteps)(result);
450461
}
462+
463+
/**
464+
* Capture a screenshot and emit it via the event bus
465+
*/
466+
private async captureAndEmitScreenshot(): Promise<void> {
467+
try {
468+
const page = await this.v3.context.awaitActivePage();
469+
const screenshot = await page.screenshot({ fullPage: false });
470+
this.v3.bus.emit("agent_screensot_taken_event", screenshot);
471+
} catch (error) {
472+
this.logger({
473+
category: "agent",
474+
message: `Error capturing screenshot: ${getErrorMessage(error)}`,
475+
level: 0,
476+
});
477+
}
478+
}
451479
}

packages/core/lib/v3/handlers/v3CuaAgentHandler.ts

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ export class V3CuaAgentHandler {
2323
private agentClient: AgentClient;
2424
private options: AgentHandlerOptions;
2525
private highlightCursor: boolean;
26-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
27-
private screenshotCollector?: any;
2826

2927
constructor(
3028
v3: V3,
@@ -545,9 +543,8 @@ export class V3CuaAgentHandler {
545543
try {
546544
const page = await this.v3.context.awaitActivePage();
547545
const base64Image = await page.screenshot({ fullPage: false });
548-
if (this.screenshotCollector) {
549-
this.screenshotCollector.addScreenshot(base64Image);
550-
}
546+
// Emit screenshot event via the bus
547+
this.v3.bus.emit("agent_screensot_taken_event", base64Image);
551548
const currentUrl = page.url();
552549
return await this.agentClient.captureScreenshot({
553550
base64Image,
@@ -571,20 +568,4 @@ export class V3CuaAgentHandler {
571568
// Best-effort only
572569
}
573570
}
574-
575-
/**
576-
* Set the screenshot collector for this agent handler
577-
*/
578-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
579-
setScreenshotCollector(collector: any): void {
580-
this.screenshotCollector = collector;
581-
}
582-
583-
/**
584-
* Get the screenshot collector
585-
*/
586-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
587-
getScreenshotCollector(): any {
588-
return this.screenshotCollector;
589-
}
590571
}

packages/core/lib/v3/v3.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import dotenv from "dotenv";
2+
import { EventEmitter } from "events";
23
import fs from "fs";
34
import os from "os";
45
import path from "path";
@@ -140,6 +141,12 @@ export class V3 {
140141
private observeHandler: ObserveHandler | null = null;
141142
private ctx: V3Context | null = null;
142143
public llmClient!: LLMClient;
144+
145+
/**
146+
* Event bus for internal communication.
147+
* Emits events like 'screenshot' when screenshots are captured during agent execution.
148+
*/
149+
public readonly bus: EventEmitter = new EventEmitter();
143150
private modelName: AvailableModel;
144151
private modelClientOptions: ClientOptions;
145152
private llmProvider: LLMProvider;

packages/core/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
"pino": "^9.6.0",
5858
"pino-pretty": "^13.0.0",
5959
"playwright": "^1.52.0",
60+
"sharp": "^0.34.5",
6061
"ws": "^8.18.0",
6162
"zod-to-json-schema": "^3.25.0"
6263
},

packages/evals/index.eval.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,13 +386,7 @@ const generateFilteredTestcases = (): Testcase[] => {
386386
// Pass full EvalInput to the task (data-driven params available via input.params)
387387
let result;
388388
try {
389-
result = await taskFunction({
390-
// ...taskInput,
391-
v3: v3Input?.v3,
392-
v3Agent: v3Input?.agent,
393-
logger: v3Input?.logger,
394-
v3Input,
395-
});
389+
result = await taskFunction({ ...v3Input, input });
396390
// Log result to console
397391
if (result && result._success) {
398392
console.log(`✅ ${input.name}: Passed`);

0 commit comments

Comments
 (0)