Skip to content

Commit fe1d1b1

Browse files
committed
more robust browser usage, allow using user's session.
1 parent 900ef27 commit fe1d1b1

File tree

6 files changed

+105
-9
lines changed

6 files changed

+105
-9
lines changed

packages/agent/src/core/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export type ToolContext = {
1010
logger: Logger;
1111
workingDirectory: string;
1212
headless: boolean;
13+
userSession?: boolean;
1314
tokenTracker: TokenTracker;
1415
};
1516

packages/agent/src/tools/browser/browseMessage.ts

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,23 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
7272
returns: zodToJsonSchema(returnSchema),
7373

7474
execute: async ({ instanceId, action }, { logger }): Promise<ReturnType> => {
75+
// Validate action format
76+
if (!action || typeof action !== 'object') {
77+
logger.error('Invalid action format: action must be an object');
78+
return {
79+
status: 'error',
80+
error: 'Invalid action format: action must be an object',
81+
};
82+
}
83+
84+
if (!action.actionType) {
85+
logger.error('Invalid action format: actionType is required');
86+
return {
87+
status: 'error',
88+
error: 'Invalid action format: actionType is required',
89+
};
90+
}
91+
7592
logger.verbose(`Executing browser action: ${action.actionType}`);
7693

7794
try {
@@ -87,10 +104,41 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
87104
if (!action.url) {
88105
throw new Error('URL required for goto action');
89106
}
90-
await page.goto(action.url, { waitUntil: 'networkidle' });
91-
const content = await page.content();
92-
logger.verbose('Navigation completed successfully');
93-
return { status: 'success', content };
107+
108+
try {
109+
// Try with 'domcontentloaded' first which is more reliable than 'networkidle'
110+
logger.verbose(
111+
`Navigating to ${action.url} with 'domcontentloaded' waitUntil`,
112+
);
113+
await page.goto(action.url, { waitUntil: 'domcontentloaded' });
114+
const content = await page.content();
115+
logger.verbose(
116+
'Navigation completed with domcontentloaded strategy',
117+
);
118+
logger.verbose(`Content: ${content}`);
119+
return { status: 'success', content };
120+
} catch (navError) {
121+
// If that fails, try with no waitUntil option
122+
logger.warn(
123+
`Failed with domcontentloaded strategy: ${errorToString(navError)}`,
124+
);
125+
logger.verbose(
126+
`Retrying navigation to ${action.url} with no waitUntil option`,
127+
);
128+
129+
try {
130+
await page.goto(action.url);
131+
const content = await page.content();
132+
logger.verbose('Navigation completed with basic strategy');
133+
logger.verbose(`Content: ${content}`);
134+
return { status: 'success', content };
135+
} catch (innerError) {
136+
logger.error(
137+
`Failed with basic navigation strategy: ${errorToString(innerError)}`,
138+
);
139+
throw innerError; // Re-throw to be caught by outer catch block
140+
}
141+
}
94142
}
95143

96144
case 'click': {

packages/agent/src/tools/browser/browseStart.ts

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,34 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
4040

4141
execute: async (
4242
{ url, timeout = 30000 },
43-
{ logger, headless = true },
43+
{ logger, headless = true, userSession = false },
4444
): Promise<ReturnType> => {
4545
logger.verbose(`Starting browser session${url ? ` at ${url}` : ''}`);
46+
logger.verbose(`User session mode: ${userSession ? 'enabled' : 'disabled'}`);
4647

4748
try {
4849
const instanceId = uuidv4();
4950

5051
// Launch browser
51-
const browser = await chromium.launch({
52+
const launchOptions = {
5253
headless,
53-
});
54+
};
55+
56+
// Use system Chrome installation if userSession is true
57+
if (userSession) {
58+
logger.verbose('Using system Chrome installation');
59+
// For Chrome, we use the channel option to specify Chrome
60+
launchOptions['channel'] = 'chrome';
61+
}
62+
63+
const browser = await chromium.launch(launchOptions);
5464

5565
// Create new context with default settings
5666
const context = await browser.newContext({
5767
viewport: null,
5868
userAgent:
5969
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
70+
serviceWorkers: 'block', // Block service workers which can cause continuous network activity
6071
});
6172

6273
// Create new page
@@ -80,11 +91,38 @@ export const browseStartTool: Tool<Parameters, ReturnType> = {
8091
// Navigate to URL if provided
8192
let content = '';
8293
if (url) {
83-
await page.goto(url, { waitUntil: 'networkidle' });
84-
content = await page.content();
94+
try {
95+
// Try with 'domcontentloaded' first which is more reliable than 'networkidle'
96+
logger.verbose(
97+
`Navigating to ${url} with 'domcontentloaded' waitUntil`,
98+
);
99+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout });
100+
content = await page.content();
101+
logger.verbose('Navigation completed with domcontentloaded strategy');
102+
} catch (error) {
103+
// If that fails, try with no waitUntil option at all (most basic)
104+
logger.warn(
105+
`Failed with domcontentloaded strategy: ${errorToString(error)}`,
106+
);
107+
logger.verbose(
108+
`Retrying navigation to ${url} with no waitUntil option`,
109+
);
110+
111+
try {
112+
await page.goto(url, { timeout });
113+
content = await page.content();
114+
logger.verbose('Navigation completed with basic strategy');
115+
} catch (innerError) {
116+
logger.error(
117+
`Failed with basic navigation strategy: ${errorToString(innerError)}`,
118+
);
119+
throw innerError; // Re-throw to be caught by outer catch block
120+
}
121+
}
85122
}
86123

87124
logger.verbose('Browser session started successfully');
125+
logger.verbose(`Content: ${content}`);
88126

89127
return {
90128
instanceId,

packages/cli/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ mycoder --promptFile=your-prompt.txt
5454
- `-f, --file`: Read prompt from a specified file
5555
- `--log`: Set log level (info, verbose, warn, error)
5656
- `--tokenUsage`: Output token usage at info log level
57+
- `--headless`: Use browser in headless mode with no UI showing (default: true)
58+
- `--userSession`: Use user's existing browser session instead of sandboxed session (default: false)
5759
- `-h, --help`: Show help
5860
- `-V, --version`: Show version
5961

packages/cli/src/commands/$default.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ export const command: CommandModule<SharedOptions, DefaultArgs> = {
128128
const result = await toolAgent(prompt, tools, undefined, {
129129
logger,
130130
headless: argv.headless ?? true,
131+
userSession: argv.userSession ?? false,
131132
workingDirectory: '.',
132133
tokenTracker,
133134
});

packages/cli/src/options.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export type SharedOptions = {
44
readonly file?: string;
55
readonly tokenUsage?: boolean;
66
readonly headless?: boolean;
7+
readonly userSession?: boolean;
78
};
89

910
export const sharedOptions = {
@@ -35,4 +36,9 @@ export const sharedOptions = {
3536
description: 'Use browser in headless mode with no UI showing',
3637
default: true,
3738
} as const,
39+
userSession: {
40+
type: 'boolean',
41+
description: 'Use user\'s existing browser session instead of sandboxed session',
42+
default: false,
43+
} as const,
3844
};

0 commit comments

Comments
 (0)