Skip to content

Commit 306648b

Browse files
committed
complete integration of pageFilter options.
1 parent 4bacaf3 commit 306648b

20 files changed

+498
-80
lines changed

packages/agent/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@
4545
"license": "MIT",
4646
"dependencies": {
4747
"@anthropic-ai/sdk": "^0.37",
48+
"@mozilla/readability": "^0.5.0",
4849
"@playwright/test": "^1.50.1",
4950
"@vitest/browser": "^3.0.5",
5051
"chalk": "^5",
5152
"dotenv": "^16",
53+
"jsdom": "^26.0.0",
5254
"playwright": "^1.50.1",
5355
"uuid": "^11",
5456
"zod": "^3",

packages/agent/src/core/executeToolCall.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export const executeToolCall = async (
2323
customPrefix: tool.logPrefix,
2424
});
2525

26-
const toolContext = {
26+
const toolContext: ToolContext = {
2727
...context,
2828
logger,
2929
};

packages/agent/src/core/toolAgent.respawn.test.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ import { getTools } from '../../src/tools/getTools.js';
55
import { MockLogger } from '../utils/mockLogger.js';
66

77
import { TokenTracker } from './tokens.js';
8+
import { ToolContext } from './types.js';
89

9-
const toolContext = {
10+
const toolContext: ToolContext = {
1011
logger: new MockLogger(),
1112
headless: true,
1213
workingDirectory: '.',
14+
userSession: false,
15+
pageFilter: 'simple',
1316
tokenTracker: new TokenTracker(),
1417
};
1518
// Mock Anthropic SDK

packages/agent/src/core/toolAgent.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ import { MockLogger } from '../utils/mockLogger.js';
55
import { executeToolCall } from './executeToolCall.js';
66
import { TokenTracker } from './tokens.js';
77
import { toolAgent } from './toolAgent.js';
8-
import { Tool } from './types.js';
8+
import { Tool, ToolContext } from './types.js';
99

10-
const toolContext = {
10+
const toolContext: ToolContext = {
1111
logger: new MockLogger(),
1212
headless: true,
1313
workingDirectory: '.',
14+
userSession: false,
15+
pageFilter: 'simple',
1416
tokenTracker: new TokenTracker(),
1517
};
1618

packages/agent/src/core/types.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ import { TokenTracker } from './tokens.js';
66

77
export type TokenLevel = 'debug' | 'verbose' | 'info' | 'warn' | 'error';
88

9+
export type pageFilter = 'simple' | 'none' | 'readability';
10+
911
export type ToolContext = {
1012
logger: Logger;
1113
workingDirectory: string;
1214
headless: boolean;
13-
userSession?: boolean;
15+
userSession: boolean;
16+
pageFilter: pageFilter;
1417
tokenTracker: TokenTracker;
1518
};
1619

packages/agent/src/tools/browser/browseMessage.ts

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { Tool } from '../../core/types.js';
55
import { errorToString } from '../../utils/errorToString.js';
66
import { sleep } from '../../utils/sleep.js';
77

8-
import { getRenderedDOM } from './getRenderedDOM.js';
8+
import { filterPageContent } from './filterPageContent.js';
99
import { browserSessions, type BrowserAction, SelectorType } from './types.js';
1010

1111
// Schema for browser action
@@ -73,7 +73,10 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
7373
parameters: zodToJsonSchema(parameterSchema),
7474
returns: zodToJsonSchema(returnSchema),
7575

76-
execute: async ({ instanceId, action }, { logger }): Promise<ReturnType> => {
76+
execute: async (
77+
{ instanceId, action },
78+
{ logger, pageFilter },
79+
): Promise<ReturnType> => {
7780
// Validate action format
7881
if (!action || typeof action !== 'object') {
7982
logger.error('Invalid action format: action must be an object');
@@ -92,6 +95,7 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
9295
}
9396

9497
logger.verbose(`Executing browser action: ${action.actionType}`);
98+
logger.verbose(`Webpage processing mode: ${pageFilter}`);
9599

96100
try {
97101
const session = browserSessions.get(instanceId);
@@ -114,11 +118,12 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
114118
);
115119
await page.goto(action.url, { waitUntil: 'domcontentloaded' });
116120
await sleep(3000);
117-
const content = await getRenderedDOM(page);
121+
const content = await filterPageContent(page, pageFilter);
122+
logger.verbose(`Content: ${content}`);
118123
logger.verbose(
119124
'Navigation completed with domcontentloaded strategy',
120125
);
121-
logger.verbose(`Content: ${content}`);
126+
logger.verbose(`Content length: ${content.length} characters`);
122127
return { status: 'success', content };
123128
} catch (navError) {
124129
// If that fails, try with no waitUntil option
@@ -132,9 +137,9 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
132137
try {
133138
await page.goto(action.url);
134139
await sleep(3000);
135-
const content = await getRenderedDOM(page);
136-
logger.verbose('Navigation completed with basic strategy');
140+
const content = await filterPageContent(page, pageFilter);
137141
logger.verbose(`Content: ${content}`);
142+
logger.verbose('Navigation completed with basic strategy');
138143
return { status: 'success', content };
139144
} catch (innerError) {
140145
logger.error(
@@ -154,7 +159,8 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
154159
action.selectorType,
155160
);
156161
await page.click(clickSelector);
157-
const content = await page.content();
162+
await sleep(1000); // Wait for any content changes after click
163+
const content = await filterPageContent(page, pageFilter);
158164
logger.verbose(
159165
`Click action completed on selector: ${clickSelector}`,
160166
);
@@ -188,8 +194,9 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
188194
}
189195

190196
case 'content': {
191-
const content = await page.content();
197+
const content = await filterPageContent(page, pageFilter);
192198
logger.verbose('Page content retrieved successfully');
199+
logger.verbose(`Content length: ${content.length} characters`);
193200
return { status: 'success', content };
194201
}
195202

@@ -216,9 +223,12 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
216223
}
217224
},
218225

219-
logParameters: ({ action, description }, { logger }) => {
226+
logParameters: (
227+
{ action, description },
228+
{ logger, pageFilter = 'simple' },
229+
) => {
220230
logger.info(
221-
`Performing browser action: ${action.actionType}, ${description}`,
231+
`Performing browser action: ${action.actionType} with ${pageFilter} processing, ${description}`,
222232
);
223233
},
224234

packages/agent/src/tools/browser/browseStart.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@ import { Tool } from '../../core/types.js';
77
import { errorToString } from '../../utils/errorToString.js';
88
import { sleep } from '../../utils/sleep.js';
99

10-
import { getRenderedDOM } from './getRenderedDOM.js';
10+
import { filterPageContent } from './filterPageContent.js';
1111
import { browserSessions } from './types.js';
1212

1313
const parameterSchema = z.object({
1414
url: z.string().url().optional().describe('Initial URL to navigate to'),
15-
1615
timeout: z
1716
.number()
1817
.optional()
@@ -42,12 +41,13 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
4241

4342
execute: async (
4443
{ url, timeout = 30000 },
45-
{ logger, headless = true, userSession = false },
44+
{ logger, headless, userSession, pageFilter },
4645
): Promise<ReturnType> => {
4746
logger.verbose(`Starting browser session${url ? ` at ${url}` : ''}`);
4847
logger.verbose(
4948
`User session mode: ${userSession ? 'enabled' : 'disabled'}`,
5049
);
50+
logger.verbose(`Webpage processing mode: ${pageFilter}`);
5151

5252
try {
5353
const instanceId = uuidv4();
@@ -102,7 +102,8 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
102102
);
103103
await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
104104
await sleep(3000);
105-
content = await getRenderedDOM(page);
105+
content = await filterPageContent(page, pageFilter);
106+
logger.verbose(`Content: ${content}`);
106107
logger.verbose('Navigation completed with domcontentloaded strategy');
107108
} catch (error) {
108109
// If that fails, try with no waitUntil option at all (most basic)
@@ -116,7 +117,8 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
116117
try {
117118
await page.goto(url, { timeout });
118119
await sleep(3000);
119-
content = await getRenderedDOM(page);
120+
content = await filterPageContent(page, pageFilter);
121+
logger.verbose(`Content: ${content}`);
120122
logger.verbose('Navigation completed with basic strategy');
121123
} catch (innerError) {
122124
logger.error(
@@ -128,7 +130,7 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
128130
}
129131

130132
logger.verbose('Browser session started successfully');
131-
logger.verbose(`Content: ${content}`);
133+
logger.verbose(`Content length: ${content.length} characters`);
132134

133135
return {
134136
instanceId,
@@ -145,9 +147,9 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
145147
}
146148
},
147149

148-
logParameters: ({ url, description }, { logger }) => {
150+
logParameters: ({ url, description }, { logger, pageFilter = 'simple' }) => {
149151
logger.info(
150-
`Starting browser session${url ? ` at ${url}` : ''}, ${description}`,
152+
`Starting browser session${url ? ` at ${url}` : ''} with ${pageFilter} processing, ${description}`,
151153
);
152154
},
153155

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { Readability } from '@mozilla/readability';
2+
import { JSDOM } from 'jsdom';
3+
import { Page } from 'playwright';
4+
5+
/**
6+
* Returns the raw HTML content of the page without any processing
7+
*/
8+
async function getNoneProcessedDOM(page: Page): Promise<string> {
9+
return await page.content();
10+
}
11+
12+
/**
13+
* Processes the page using Mozilla's Readability to extract the main content
14+
* Falls back to simple processing if Readability fails
15+
*/
16+
async function getReadabilityProcessedDOM(page: Page): Promise<string> {
17+
try {
18+
const html = await page.content();
19+
const url = page.url();
20+
const dom = new JSDOM(html, { url });
21+
const reader = new Readability(dom.window.document);
22+
const article = reader.parse();
23+
24+
if (!article) {
25+
console.warn(
26+
'Readability could not parse the page, falling back to simple mode',
27+
);
28+
return getSimpleProcessedDOM(page);
29+
}
30+
31+
// Return a formatted version of the article
32+
return JSON.stringify(
33+
{
34+
url: url,
35+
title: article.title || '',
36+
content: article.content || '',
37+
textContent: article.textContent || '',
38+
excerpt: article.excerpt || '',
39+
byline: article.byline || '',
40+
dir: article.dir || '',
41+
siteName: article.siteName || '',
42+
length: article.length || 0,
43+
},
44+
null,
45+
2,
46+
);
47+
} catch (error) {
48+
console.error('Error using Readability:', error);
49+
// Fallback to simple mode if Readability fails
50+
return getSimpleProcessedDOM(page);
51+
}
52+
}
53+
54+
/**
55+
* Processes the page by removing invisible elements and non-visual tags
56+
*/
57+
async function getSimpleProcessedDOM(page: Page): Promise<string> {
58+
const domContent = await page.evaluate(() => {
59+
const clone = document.documentElement;
60+
61+
const elements = clone.querySelectorAll('*');
62+
63+
const elementsToRemove: Element[] = [];
64+
elements.forEach((element) => {
65+
const computedStyle = window.getComputedStyle(element);
66+
const isVisible =
67+
computedStyle.display !== 'none' &&
68+
computedStyle.visibility !== 'hidden' &&
69+
computedStyle.opacity !== '0';
70+
71+
if (!isVisible) {
72+
elementsToRemove.push(element);
73+
}
74+
});
75+
76+
const nonVisualTags = clone.querySelectorAll(
77+
'noscript, iframe, link[rel="stylesheet"], meta, svg, img, symbol, path, style, script',
78+
);
79+
nonVisualTags.forEach((element) => elementsToRemove.push(element));
80+
81+
elementsToRemove.forEach((element) => element.remove());
82+
83+
console.log(
84+
'removing ',
85+
elementsToRemove.length,
86+
' elements out of a total ',
87+
elements.length,
88+
);
89+
90+
return clone.outerHTML;
91+
});
92+
93+
return domContent.replace(/\n/g, '').replace(/\s+/g, ' ');
94+
}
95+
96+
/**
97+
* Gets the rendered DOM of a page with specified processing method
98+
*/
99+
export async function filterPageContent(
100+
page: Page,
101+
pageFilter: 'simple' | 'none' | 'readability',
102+
): Promise<string> {
103+
switch (pageFilter) {
104+
case 'none':
105+
return getNoneProcessedDOM(page);
106+
case 'readability':
107+
return getReadabilityProcessedDOM(page);
108+
case 'simple':
109+
default:
110+
return getSimpleProcessedDOM(page);
111+
}
112+
}

packages/agent/src/tools/browser/getRenderedDOM.ts

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)