diff --git a/src/puppeteer/README.md b/src/puppeteer/README.md index f39b8a7bbd..0364643786 100644 --- a/src/puppeteer/README.md +++ b/src/puppeteer/README.md @@ -22,6 +22,7 @@ A Model Context Protocol server that provides browser automation capabilities us - `selector` (string, optional): CSS selector for element to screenshot - `width` (number, optional, default: 800): Screenshot width - `height` (number, optional, default: 600): Screenshot height + - `encoded` (boolean, optional): If true, capture the screenshot as a base64-encoded data URI (as text) instead of binary image content. Default false. - **puppeteer_click** diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index 1849c78398..63007799b8 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -39,6 +39,7 @@ const TOOLS: Tool[] = [ selector: { type: "string", description: "CSS selector for element to screenshot" }, width: { type: "number", description: "Width in pixels (default: 800)" }, height: { type: "number", description: "Height in pixels (default: 600)" }, + encoded: { type: "boolean", description: "If true, capture the screenshot as a base64-encoded data URI (as text) instead of binary image content. Default false." }, }, required: ["name"], }, @@ -228,6 +229,7 @@ async function handleToolCall(name: string, args: any): Promise case "puppeteer_screenshot": { const width = args.width ?? 800; const height = args.height ?? 600; + const encoded = args.encoded ?? false; await page.setViewport({ width, height }); const screenshot = await (args.selector ? @@ -255,11 +257,14 @@ async function handleToolCall(name: string, args: any): Promise type: "text", text: `Screenshot '${args.name}' taken at ${width}x${height}`, } as TextContent, - { + encoded ? ({ + type: "text", + text: `data:image/png;base64,${screenshot}`, + } as TextContent) : ({ type: "image", data: screenshot, mimeType: "image/png", - } as ImageContent, + } as ImageContent), ], isError: false, };