From c5de8bdf3f33090471199764370da9b0046ab59b Mon Sep 17 00:00:00 2001 From: Jun Date: Tue, 1 Apr 2025 22:28:22 -0500 Subject: [PATCH 1/3] Add a Puppeteer tool to capture screenshot as raw base64 text --- src/puppeteer/README.md | 8 ++++++ src/puppeteer/index.ts | 55 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/puppeteer/README.md b/src/puppeteer/README.md index 4eab314c9a..50f8b82e92 100644 --- a/src/puppeteer/README.md +++ b/src/puppeteer/README.md @@ -21,6 +21,14 @@ A Model Context Protocol server that provides browser automation capabilities us - `width` (number, optional, default: 800): Screenshot width - `height` (number, optional, default: 600): Screenshot height +- **puppeteer_screenshot_encoded** + - Captures a screenshot of the entire page or a specific element and return it as a base64-encoded data URI. + - Inputs: + - `name` (string, required): Name for the screenshot + - `selector` (string, optional): CSS selector for element to screenshot + - `width` (number, optional, default: 800): Screenshot width + - `height` (number, optional, default: 600): Screenshot height + - **puppeteer_click** - Click elements on the page - Input: `selector` (string): CSS selector for element to click diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index 1849c78398..feabec7018 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -43,6 +43,20 @@ const TOOLS: Tool[] = [ required: ["name"], }, }, + { + name: "puppeteer_screenshot_encoded", + description: "Take a screenshot of the current page or a specific element and return it as a base64-encoded data URI", + inputSchema: { + type: "object", + properties: { + name: { type: "string", description: "Name for the screenshot" }, + selector: { type: "string", description: "CSS selector for element to screenshot" }, + width: { type: "number", description: "Width in pixels (default: 800)" }, + height: { type: "number", description: "Height in pixels (default: 600)" }, + }, + required: ["name"], + }, + }, { name: "puppeteer_click", description: "Click an element on the page", @@ -265,6 +279,47 @@ async function handleToolCall(name: string, args: any): Promise }; } + case "puppeteer_screenshot_encoded": { + const width = args.width ?? 800; + const height = args.height ?? 600; + await page.setViewport({ width, height }); + + const screenshot = await (args.selector + ? (await page.$(args.selector))?.screenshot({ encoding: "base64" }) + : page.screenshot({ encoding: "base64", fullPage: false })); + + if (!screenshot) { + return { + content: [ + { + type: "text", + text: args.selector ? `Element not found: ${args.selector}` : "Screenshot failed", + }, + ], + isError: true, + }; + } + + screenshots.set(args.name, screenshot as string); + server.notification({ + method: "notifications/resources/list_changed", + }); + + return { + content: [ + { + type: "text", + text: `Screenshot '${args.name}' taken at ${width}x${height}`, + } as TextContent, + { + type: "text", + text: `data:image/png;base64,${screenshot}`, + } as TextContent, + ], + isError: false, + }; + } + case "puppeteer_click": try { await page.click(args.selector); From 51593d9a9a8c5bd5b4c1915d889f3eda961a4c77 Mon Sep 17 00:00:00 2001 From: Jun Date: Tue, 6 May 2025 02:46:41 -0500 Subject: [PATCH 2/3] add optional encoded param to puppeteer_screenshot for base64 output --- src/puppeteer/README.md | 9 +------- src/puppeteer/index.ts | 49 +++++------------------------------------ 2 files changed, 7 insertions(+), 51 deletions(-) diff --git a/src/puppeteer/README.md b/src/puppeteer/README.md index 5b31f4afef..0364643786 100644 --- a/src/puppeteer/README.md +++ b/src/puppeteer/README.md @@ -22,14 +22,7 @@ A Model Context Protocol server that provides browser automation capabilities us - `selector` (string, optional): CSS selector for element to screenshot - `width` (number, optional, default: 800): Screenshot width - `height` (number, optional, default: 600): Screenshot height - -- **puppeteer_screenshot_encoded** - - Captures a screenshot of the entire page or a specific element and return it as a base64-encoded data URI. - - Inputs: - - `name` (string, required): Name for the screenshot - - `selector` (string, optional): CSS selector for element to screenshot - - `width` (number, optional, default: 800): Screenshot width - - `height` (number, optional, default: 600): Screenshot height + - `encoded` (boolean, optional): If true, capture the screenshot as a base64-encoded data URI (as text) instead of binary image content. Default false. - **puppeteer_click** diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index feabec7018..592e0cdc84 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -242,6 +242,7 @@ async function handleToolCall(name: string, args: any): Promise case "puppeteer_screenshot": { const width = args.width ?? 800; const height = args.height ?? 600; + const encoded = args.encoded ?? false; await page.setViewport({ width, height }); const screenshot = await (args.selector ? @@ -269,52 +270,14 @@ async function handleToolCall(name: string, args: any): Promise type: "text", text: `Screenshot '${args.name}' taken at ${width}x${height}`, } as TextContent, - { + encoded ? ({ + type: "text", + text: `data:image/png;base64,${screenshot}`, + } as TextContent) : ({ type: "image", data: screenshot, mimeType: "image/png", - } as ImageContent, - ], - isError: false, - }; - } - - case "puppeteer_screenshot_encoded": { - const width = args.width ?? 800; - const height = args.height ?? 600; - await page.setViewport({ width, height }); - - const screenshot = await (args.selector - ? (await page.$(args.selector))?.screenshot({ encoding: "base64" }) - : page.screenshot({ encoding: "base64", fullPage: false })); - - if (!screenshot) { - return { - content: [ - { - type: "text", - text: args.selector ? `Element not found: ${args.selector}` : "Screenshot failed", - }, - ], - isError: true, - }; - } - - screenshots.set(args.name, screenshot as string); - server.notification({ - method: "notifications/resources/list_changed", - }); - - return { - content: [ - { - type: "text", - text: `Screenshot '${args.name}' taken at ${width}x${height}`, - } as TextContent, - { - type: "text", - text: `data:image/png;base64,${screenshot}`, - } as TextContent, + } as ImageContent), ], isError: false, }; From 4bb9ec779e4218250fe2c2b9ef6087abc9ce1750 Mon Sep 17 00:00:00 2001 From: Jun Date: Tue, 6 May 2025 16:18:57 -0500 Subject: [PATCH 3/3] remove redundancy and update puppeteer_screenshot tool description --- src/puppeteer/index.ts | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index 592e0cdc84..63007799b8 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -39,20 +39,7 @@ const TOOLS: Tool[] = [ selector: { type: "string", description: "CSS selector for element to screenshot" }, width: { type: "number", description: "Width in pixels (default: 800)" }, height: { type: "number", description: "Height in pixels (default: 600)" }, - }, - required: ["name"], - }, - }, - { - name: "puppeteer_screenshot_encoded", - description: "Take a screenshot of the current page or a specific element and return it as a base64-encoded data URI", - inputSchema: { - type: "object", - properties: { - name: { type: "string", description: "Name for the screenshot" }, - selector: { type: "string", description: "CSS selector for element to screenshot" }, - width: { type: "number", description: "Width in pixels (default: 800)" }, - height: { type: "number", description: "Height in pixels (default: 600)" }, + encoded: { type: "boolean", description: "If true, capture the screenshot as a base64-encoded data URI (as text) instead of binary image content. Default false." }, }, required: ["name"], },