diff --git a/.changeset/eleven-apples-yell.md b/.changeset/eleven-apples-yell.md new file mode 100644 index 000000000..57f428f36 --- /dev/null +++ b/.changeset/eleven-apples-yell.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Add support for aborting / stopping an agent run & continuing an agent run using messages from prior runs diff --git a/packages/core/lib/v3/agent/utils/validateExperimentalFeatures.ts b/packages/core/lib/v3/agent/utils/validateExperimentalFeatures.ts new file mode 100644 index 000000000..b8ad53b52 --- /dev/null +++ b/packages/core/lib/v3/agent/utils/validateExperimentalFeatures.ts @@ -0,0 +1,91 @@ +import { + ExperimentalNotConfiguredError, + StagehandInvalidArgumentError, +} from "../../types/public/sdkErrors"; +import type { AgentConfig, AgentExecuteOptionsBase } from "../../types/public"; + +export interface AgentValidationOptions { + /** Whether experimental mode is enabled */ + isExperimental: boolean; + /** Agent config options (integrations, tools, stream, cua, etc.) */ + agentConfig?: Partial; + /** Execute options (callbacks, signal, messages, etc.) */ + executeOptions?: + | (Partial & { callbacks?: unknown }) + | null; + /** Whether this is streaming mode (can be derived from agentConfig.stream) */ + isStreaming?: boolean; +} + +/** + * Validates agent configuration and experimental feature usage. + * + * This utility consolidates all validation checks for both CUA and non-CUA agent paths: + * - Invalid argument errors for CUA (streaming, abort signal, message continuation are not supported) + * - Experimental feature checks for non-CUA (integrations, tools, callbacks, signal, messages, streaming) + * + * Throws StagehandInvalidArgumentError for invalid/unsupported configurations. + * Throws ExperimentalNotConfiguredError if experimental features are used without experimental mode. + */ +export function validateExperimentalFeatures( + options: AgentValidationOptions, +): void { + const { isExperimental, agentConfig, executeOptions, isStreaming } = options; + + // CUA-specific validation: certain features are not available at all + if (agentConfig?.cua) { + const unsupportedFeatures: string[] = []; + + if (agentConfig?.stream) { + unsupportedFeatures.push("streaming"); + } + if (executeOptions?.signal) { + unsupportedFeatures.push("abort signal"); + } + if (executeOptions?.messages) { + unsupportedFeatures.push("message continuation"); + } + + if (unsupportedFeatures.length > 0) { + throw new StagehandInvalidArgumentError( + `${unsupportedFeatures.join(", ")} ${unsupportedFeatures.length === 1 ? "is" : "are"} not supported with CUA (Computer Use Agent) mode.`, + ); + } + } + + // Skip experimental checks if already in experimental mode + if (isExperimental) return; + + const features: string[] = []; + + // Check agent config features (check array length to avoid false positives for empty arrays) + const hasIntegrations = + agentConfig?.integrations && agentConfig.integrations.length > 0; + const hasTools = + agentConfig?.tools && Object.keys(agentConfig.tools).length > 0; + if (hasIntegrations || hasTools) { + features.push("MCP integrations and custom tools"); + } + + // Check streaming mode (either explicit or derived from config) - only for non-CUA + if (!agentConfig?.cua && (isStreaming || agentConfig?.stream)) { + features.push("streaming"); + } + + // Check execute options features - only for non-CUA + if (executeOptions && !agentConfig?.cua) { + if (executeOptions.callbacks) { + features.push("callbacks"); + } + if (executeOptions.signal) { + features.push("abort signal"); + } + if (executeOptions.messages) { + features.push("message continuation"); + } + } + + if (features.length > 0) { + throw new ExperimentalNotConfiguredError(`Agent ${features.join(", ")}`); + } +} diff --git a/packages/core/lib/v3/handlers/v3AgentHandler.ts b/packages/core/lib/v3/handlers/v3AgentHandler.ts index 6648256bf..081004a6a 100644 --- a/packages/core/lib/v3/handlers/v3AgentHandler.ts +++ b/packages/core/lib/v3/handlers/v3AgentHandler.ts @@ -28,8 +28,13 @@ import { mapToolResultToActions } from "../agent/utils/actionMapping"; import { MissingLLMConfigurationError, StreamingCallbacksInNonStreamingModeError, + AgentAbortError, } from "../types/public/sdkErrors"; +function getErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + export class V3AgentHandler { private v3: V3; private logger: (message: LogLine) => void; @@ -71,9 +76,11 @@ export class V3AgentHandler { ); const tools = this.createTools(); const allTools: ToolSet = { ...tools, ...this.mcpTools }; - const messages: ModelMessage[] = [ - { role: "user", content: options.instruction }, - ]; + + // Use provided messages for continuation, or start fresh with the instruction + const messages: ModelMessage[] = options.messages?.length + ? [...options.messages, { role: "user", content: options.instruction }] + : [{ role: "user", content: options.instruction }]; if (!this.llmClient?.getLanguageModel) { throw new MissingLLMConfigurationError(); @@ -174,41 +181,52 @@ export class V3AgentHandler { instructionOrOptions: string | AgentExecuteOptions, ): Promise { const startTime = Date.now(); - const { - maxSteps, - systemPrompt, - allTools, - messages, - wrappedModel, - initialPageUrl, - } = await this.prepareAgent(instructionOrOptions); - - const callbacks = (instructionOrOptions as AgentExecuteOptions).callbacks; - - if (callbacks) { - const streamingOnlyCallbacks = [ - "onChunk", - "onFinish", - "onError", - "onAbort", - ]; - const invalidCallbacks = streamingOnlyCallbacks.filter( - (name) => callbacks[name as keyof typeof callbacks] != null, - ); - if (invalidCallbacks.length > 0) { - throw new StreamingCallbacksInNonStreamingModeError(invalidCallbacks); - } - } + const signal = + typeof instructionOrOptions === "object" + ? instructionOrOptions.signal + : undefined; const state: AgentState = { collectedReasoning: [], actions: [], finalMessage: "", completed: false, - currentPageUrl: initialPageUrl, + currentPageUrl: "", }; + let messages: ModelMessage[] = []; + try { + const { + options, + maxSteps, + systemPrompt, + allTools, + messages: preparedMessages, + wrappedModel, + initialPageUrl, + } = await this.prepareAgent(instructionOrOptions); + + messages = preparedMessages; + state.currentPageUrl = initialPageUrl; + + const callbacks = (instructionOrOptions as AgentExecuteOptions).callbacks; + + if (callbacks) { + const streamingOnlyCallbacks = [ + "onChunk", + "onFinish", + "onError", + "onAbort", + ]; + const invalidCallbacks = streamingOnlyCallbacks.filter( + (name) => callbacks[name as keyof typeof callbacks] != null, + ); + if (invalidCallbacks.length > 0) { + throw new StreamingCallbacksInNonStreamingModeError(invalidCallbacks); + } + } + const result = await this.llmClient.generateText({ model: wrappedModel, system: systemPrompt, @@ -219,21 +237,41 @@ export class V3AgentHandler { toolChoice: "auto", prepareStep: callbacks?.prepareStep, onStepFinish: this.createStepHandler(state, callbacks?.onStepFinish), + abortSignal: options.signal, }); - return this.consolidateMetricsAndResult(startTime, state, result); + return this.consolidateMetricsAndResult( + startTime, + state, + messages, + result, + ); } catch (error) { - const errorMessage = error?.message ?? String(error); + // Re-throw validation errors that should propagate to the caller + if (error instanceof StreamingCallbacksInNonStreamingModeError) { + throw error; + } + + // Re-throw abort errors wrapped in AgentAbortError for consistent error typing + if (signal?.aborted) { + const reason = signal.reason ? String(signal.reason) : "aborted"; + throw new AgentAbortError(reason); + } + + const errorMessage = getErrorMessage(error); this.logger({ category: "agent", message: `Error executing agent task: ${errorMessage}`, level: 0, }); + + // For non-abort errors, return a failure result instead of throwing return { success: false, actions: state.actions, message: `Failed to execute task: ${errorMessage}`, completed: false, + messages, }; } } @@ -242,6 +280,7 @@ export class V3AgentHandler { instructionOrOptions: string | AgentStreamExecuteOptions, ): Promise { const { + options, maxSteps, systemPrompt, allTools, @@ -269,17 +308,6 @@ export class V3AgentHandler { rejectResult = reject; }); - const handleError = (error: unknown) => { - const errorMessage = - error instanceof Error ? error.message : String(error); - this.logger({ - category: "agent", - message: `Error during streaming: ${errorMessage}`, - level: 0, - }); - rejectResult(error); - }; - const streamResult = this.llmClient.streamText({ model: wrappedModel, system: systemPrompt, @@ -294,24 +322,45 @@ export class V3AgentHandler { if (callbacks?.onError) { callbacks.onError(event); } - handleError(event.error); + // Convert abort errors to AgentAbortError for consistent error typing + if (options.signal?.aborted) { + const reason = options.signal.reason + ? String(options.signal.reason) + : "aborted"; + rejectResult(new AgentAbortError(reason)); + } else { + this.logger({ + category: "agent", + message: `Error during streaming: ${getErrorMessage(event.error)}`, + level: 0, + }); + rejectResult(event.error); + } }, onChunk: callbacks?.onChunk, onFinish: (event) => { if (callbacks?.onFinish) { callbacks.onFinish(event); } - try { - const result = this.consolidateMetricsAndResult( - startTime, - state, - event, - ); - resolveResult(result); - } catch (error) { - handleError(error); + const result = this.consolidateMetricsAndResult( + startTime, + state, + messages, + event, + ); + resolveResult(result); + }, + onAbort: (event) => { + if (callbacks?.onAbort) { + callbacks.onAbort(event); } + // Reject the result promise with AgentAbortError when stream is aborted + const reason = options.signal?.reason + ? String(options.signal.reason) + : "Stream was aborted"; + rejectResult(new AgentAbortError(reason)); }, + abortSignal: options.signal, }); const agentStreamResult = streamResult as AgentStreamResult; @@ -322,7 +371,12 @@ export class V3AgentHandler { private consolidateMetricsAndResult( startTime: number, state: AgentState, - result: { text?: string; usage?: LanguageModelUsage }, + inputMessages: ModelMessage[], + result: { + text?: string; + usage?: LanguageModelUsage; + response?: { messages?: ModelMessage[] }; + }, ): AgentResult { if (!state.finalMessage) { const allReasoning = state.collectedReasoning.join(" ").trim(); @@ -342,6 +396,13 @@ export class V3AgentHandler { ); } + // Combine input messages with response messages for full conversation history + const responseMessages = result.response?.messages || []; + const fullMessages: ModelMessage[] = [ + ...inputMessages, + ...responseMessages, + ]; + return { success: state.completed, message: state.finalMessage || "Task execution completed", @@ -356,6 +417,7 @@ export class V3AgentHandler { inference_time_ms: inferenceTimeMs, } : undefined, + messages: fullMessages, }; } diff --git a/packages/core/lib/v3/tests/agent-abort-signal.spec.ts b/packages/core/lib/v3/tests/agent-abort-signal.spec.ts new file mode 100644 index 000000000..f75efd848 --- /dev/null +++ b/packages/core/lib/v3/tests/agent-abort-signal.spec.ts @@ -0,0 +1,163 @@ +import { test, expect } from "@playwright/test"; +import { V3 } from "../v3"; +import { v3TestConfig } from "./v3.config"; +import { AgentAbortError } from "../types/public/sdkErrors"; + +test.describe("Stagehand agent abort signal", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: true, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("non-streaming: abort signal stops execution and throws AgentAbortError", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + const controller = new AbortController(); + + // Abort after 500ms - should be enough for the LLM to start but not finish + setTimeout(() => controller.abort(), 500); + + await expect( + agent.execute({ + instruction: + "Describe every visual element on this page in extreme detail. Do not use the close tool until you have described at least 100 different elements.", + maxSteps: 50, + signal: controller.signal, + }), + ).rejects.toThrow(AgentAbortError); + }); + + test("streaming: abort signal stops stream and rejects result with AgentAbortError", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + stream: true, + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + const controller = new AbortController(); + + // Abort after 500ms + setTimeout(() => controller.abort(), 500); + + const streamResult = await agent.execute({ + instruction: + "Describe every visual element on this page in extreme detail. Do not use the close tool until you have described at least 100 different elements.", + maxSteps: 50, + signal: controller.signal, + }); + + // Handle both stream consumption and result promise together + // The result promise will reject with AgentAbortError when aborted + const consumeStream = async () => { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of streamResult.textStream) { + // Just consume chunks until stream ends + } + }; + + // Both should complete - stream ends and result rejects + const [, resultError] = await Promise.allSettled([ + consumeStream(), + streamResult.result, + ]); + + // The result should have rejected with AgentAbortError + expect(resultError.status).toBe("rejected"); + expect((resultError as PromiseRejectedResult).reason).toBeInstanceOf( + AgentAbortError, + ); + }); + + test("non-streaming: already aborted signal throws AgentAbortError immediately", async () => { + test.setTimeout(10000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + // Create an already aborted controller + const controller = new AbortController(); + controller.abort(); + + await expect( + agent.execute({ + instruction: "This should not run.", + maxSteps: 3, + signal: controller.signal, + }), + ).rejects.toThrow(AgentAbortError); + }); + + test("non-streaming: execution completes normally without abort signal", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + // No signal provided - should complete normally + const result = await agent.execute({ + instruction: "Use the close tool with taskComplete: true immediately.", + maxSteps: 3, + }); + + expect(result.success).toBe(true); + expect(result.completed).toBe(true); + }); + + test("streaming: execution completes normally without abort signal", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + stream: true, + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + // No signal provided - should complete normally + const streamResult = await agent.execute({ + instruction: "Use the close tool with taskComplete: true immediately.", + maxSteps: 3, + }); + + // Consume the stream first + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of streamResult.textStream) { + // Just consume + } + + // Now get the final result + const result = await streamResult.result; + + expect(result.success).toBe(true); + expect(result.completed).toBe(true); + }); +}); diff --git a/packages/core/lib/v3/tests/agent-experimental-validation.spec.ts b/packages/core/lib/v3/tests/agent-experimental-validation.spec.ts new file mode 100644 index 000000000..9fb861265 --- /dev/null +++ b/packages/core/lib/v3/tests/agent-experimental-validation.spec.ts @@ -0,0 +1,402 @@ +import { test, expect } from "@playwright/test"; +import { z } from "zod"; +import { tool } from "ai"; +import { V3 } from "../v3"; +import { v3TestConfig } from "./v3.config"; +import { + ExperimentalNotConfiguredError, + StagehandInvalidArgumentError, +} from "../types/public/sdkErrors"; + +// Define a mock custom tool for testing +const mockCustomTool = tool({ + description: "A mock tool for testing", + inputSchema: z.object({ + input: z.string().describe("The input string"), + }), + execute: async ({ input }) => { + return `Processed: ${input}`; + }, +}); + +test.describe("Stagehand agent experimental feature validation", () => { + test.describe("Invalid argument errors", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: false, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("throws StagehandInvalidArgumentError when CUA and streaming are both enabled", async () => { + try { + v3.agent({ + cua: true, + stream: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + expect((error as Error).message).toContain("streaming"); + expect((error as Error).message).toContain("not supported with CUA"); + } + }); + + test("throws StagehandInvalidArgumentError for CUA + streaming even with experimental: true", async () => { + // Close the non-experimental instance + await v3.close(); + + // Create an experimental instance + const v3Experimental = new V3({ + ...v3TestConfig, + experimental: true, + }); + await v3Experimental.init(); + + try { + v3Experimental.agent({ + cua: true, + stream: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + expect((error as Error).message).toContain("streaming"); + expect((error as Error).message).toContain("not supported with CUA"); + } finally { + await v3Experimental.close(); + } + }); + }); + + test.describe("Experimental feature errors without experimental: true", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: false, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("throws ExperimentalNotConfiguredError for MCP integrations", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + integrations: ["https://mcp.example.com"], + }); + + try { + await agent.execute("test"); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain( + "MCP integrations and custom tools", + ); + } + }); + + test("throws ExperimentalNotConfiguredError for custom tools", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + tools: { + mockCustomTool, + }, + }); + + try { + await agent.execute("test"); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain( + "MCP integrations and custom tools", + ); + } + }); + + test("throws ExperimentalNotConfiguredError for streaming mode", async () => { + try { + const agent = v3.agent({ + stream: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + await agent.execute("test instruction"); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain("streaming"); + } + }); + + test("throws ExperimentalNotConfiguredError for callbacks", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + }); + + try { + await agent.execute({ + instruction: "test", + callbacks: { + onStepFinish: async () => {}, + }, + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain("callbacks"); + } + }); + + test("throws ExperimentalNotConfiguredError for abort signal", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + }); + + const controller = new AbortController(); + try { + await agent.execute({ + instruction: "test", + signal: controller.signal, + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain("abort signal"); + } + }); + + test("throws ExperimentalNotConfiguredError for message continuation", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + }); + + try { + await agent.execute({ + instruction: "test", + messages: [{ role: "user", content: "previous message" }], + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain("message continuation"); + } + }); + + test("throws ExperimentalNotConfiguredError listing multiple features", async () => { + const agent = v3.agent({ + model: "anthropic/claude-sonnet-4-20250514", + }); + + const controller = new AbortController(); + try { + await agent.execute({ + instruction: "test", + callbacks: { onStepFinish: async () => {} }, + signal: controller.signal, + messages: [{ role: "user", content: "previous" }], + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + const message = (error as Error).message; + expect(message).toContain("callbacks"); + expect(message).toContain("abort signal"); + expect(message).toContain("message continuation"); + } + }); + }); + + test.describe("CUA agent unsupported features", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: false, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("throws ExperimentalNotConfiguredError for CUA with integrations", async () => { + // MCP integrations are still an experimental feature check (not unsupported) + try { + v3.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + integrations: ["https://mcp.example.com"], + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(ExperimentalNotConfiguredError); + expect((error as Error).message).toContain( + "MCP integrations and custom tools", + ); + } + }); + + test("throws StagehandInvalidArgumentError for CUA with abort signal (not supported)", async () => { + const agent = v3.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + + const controller = new AbortController(); + try { + await agent.execute({ + instruction: "test", + signal: controller.signal, + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + expect((error as Error).message).toContain("abort signal"); + expect((error as Error).message).toContain("not supported with CUA"); + } + }); + + test("throws StagehandInvalidArgumentError for CUA with message continuation (not supported)", async () => { + const agent = v3.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + + try { + await agent.execute({ + instruction: "test", + messages: [{ role: "user", content: "previous message" }], + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + expect((error as Error).message).toContain("message continuation"); + expect((error as Error).message).toContain("not supported with CUA"); + } + }); + + test("throws StagehandInvalidArgumentError for CUA with multiple unsupported features", async () => { + const agent = v3.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + + const controller = new AbortController(); + try { + await agent.execute({ + instruction: "test", + signal: controller.signal, + messages: [{ role: "user", content: "previous message" }], + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + const message = (error as Error).message; + expect(message).toContain("abort signal"); + expect(message).toContain("message continuation"); + expect(message).toContain("are not supported with CUA"); + } + }); + + test("throws StagehandInvalidArgumentError for CUA unsupported features even with experimental: true", async () => { + // Close the non-experimental instance + await v3.close(); + + // Create an experimental instance + const v3Experimental = new V3({ + ...v3TestConfig, + experimental: true, + }); + await v3Experimental.init(); + + const agent = v3Experimental.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + }); + + const controller = new AbortController(); + try { + await agent.execute({ + instruction: "test", + signal: controller.signal, + }); + throw new Error("Expected error to be thrown"); + } catch (error) { + expect(error).toBeInstanceOf(StagehandInvalidArgumentError); + expect((error as Error).message).toContain("not supported with CUA"); + } finally { + await v3Experimental.close(); + } + }); + }); + + test.describe("Valid configurations with experimental: true", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: true, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("allows CUA without streaming", () => { + expect(() => + v3.agent({ + cua: true, + model: "anthropic/claude-sonnet-4-20250514", + }), + ).not.toThrow(); + }); + + test("allows streaming mode", () => { + expect(() => + v3.agent({ + stream: true, + model: "anthropic/claude-sonnet-4-20250514", + }), + ).not.toThrow(); + }); + + test("allows basic agent without experimental features", async () => { + const v3NonExperimental = new V3({ + ...v3TestConfig, + experimental: false, + }); + await v3NonExperimental.init(); + + try { + // This should work - just creating a basic agent with no experimental features + expect(() => + v3NonExperimental.agent({ + model: "anthropic/claude-sonnet-4-20250514", + }), + ).not.toThrow(); + } finally { + await v3NonExperimental.close(); + } + }); + }); +}); diff --git a/packages/core/lib/v3/tests/agent-message-continuation.spec.ts b/packages/core/lib/v3/tests/agent-message-continuation.spec.ts new file mode 100644 index 000000000..ab19bde97 --- /dev/null +++ b/packages/core/lib/v3/tests/agent-message-continuation.spec.ts @@ -0,0 +1,152 @@ +import { test, expect } from "@playwright/test"; +import { V3 } from "../v3"; +import { v3TestConfig } from "./v3.config"; +import type { ModelMessage } from "ai"; + +test.describe("Stagehand agent message continuation", () => { + let v3: V3; + + test.beforeEach(async () => { + v3 = new V3({ + ...v3TestConfig, + experimental: true, + }); + await v3.init(); + }); + + test.afterEach(async () => { + await v3?.close?.().catch(() => {}); + }); + + test("execute returns messages in the result", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + const result = await agent.execute({ + instruction: + "What is the title of this page? Use close tool with taskComplete: true after answering.", + maxSteps: 5, + }); + + // Result should contain messages + expect(result.messages).toBeDefined(); + expect(Array.isArray(result.messages)).toBe(true); + expect(result.messages!.length).toBeGreaterThan(0); + + // First message should be the user instruction + const firstMessage = result.messages![0]; + expect(firstMessage.role).toBe("user"); + }); + + test("can continue conversation with previous messages", async () => { + test.setTimeout(120000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + // First execution + const result1 = await agent.execute({ + instruction: + "What is the title of this page? Use close tool with taskComplete: true after answering.", + maxSteps: 5, + }); + + expect(result1.messages).toBeDefined(); + expect(result1.messages!.length).toBeGreaterThan(0); + + // Second execution continuing from first + const result2 = await agent.execute({ + instruction: + "Based on what you just told me, is this a simple or complex website? Use close tool with taskComplete: true after answering.", + maxSteps: 5, + messages: result1.messages, + }); + + expect(result2.messages).toBeDefined(); + // Second result should have more messages (includes first conversation) + expect(result2.messages!.length).toBeGreaterThan(result1.messages!.length); + }); + + test("messages include tool calls and results", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + const result = await agent.execute({ + instruction: + "Use the ariaTree tool to see the page, then use close tool with taskComplete: true.", + maxSteps: 5, + }); + + expect(result.messages).toBeDefined(); + + // Verify there are assistant messages + const assistantMessages = result.messages!.filter( + (m: ModelMessage) => m.role === "assistant", + ); + expect(assistantMessages.length).toBeGreaterThan(0); + + // Verify at least one assistant message contains tool calls + const hasToolCalls = assistantMessages.some((m: ModelMessage) => { + if (Array.isArray(m.content)) { + return m.content.some( + (part) => typeof part === "object" && part.type === "tool-call", + ); + } + return false; + }); + expect(hasToolCalls).toBe(true); + + // Verify there are tool result messages + const hasToolResults = result.messages!.some( + (m: ModelMessage) => m.role === "tool", + ); + expect(hasToolResults).toBe(true); + }); + + test("streaming mode also returns messages", async () => { + test.setTimeout(60000); + + const agent = v3.agent({ + stream: true, + model: "anthropic/claude-haiku-4-5-20251001", + }); + + const page = v3.context.pages()[0]; + await page.goto("https://example.com"); + + const streamResult = await agent.execute({ + instruction: + "What is this page? Use close tool with taskComplete: true after answering.", + maxSteps: 5, + }); + + // Consume the stream + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of streamResult.textStream) { + // Just consume + } + + const result = await streamResult.result; + + // Result should contain messages + expect(result.messages).toBeDefined(); + expect(Array.isArray(result.messages)).toBe(true); + expect(result.messages!.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/core/lib/v3/tests/agent-streaming.spec.ts b/packages/core/lib/v3/tests/agent-streaming.spec.ts index 35020beb3..bc8951e63 100644 --- a/packages/core/lib/v3/tests/agent-streaming.spec.ts +++ b/packages/core/lib/v3/tests/agent-streaming.spec.ts @@ -48,6 +48,13 @@ test.describe("Stagehand agent streaming behavior", () => { // result should be a promise expect(streamResult.result).toBeInstanceOf(Promise); + + // Consume stream to avoid unhandled rejection on close + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of streamResult.textStream) { + // Just consume + } + await streamResult.result; }); test("textStream yields chunks incrementally", async () => { @@ -151,7 +158,7 @@ test.describe("Stagehand agent streaming behavior", () => { stream: true, model: "anthropic/claude-haiku-4-5-20251001", }); - }).toThrow("Streaming is not supported with CUA"); + }).toThrow("streaming is not supported with CUA"); }); test("allows cua: true without stream", () => { diff --git a/packages/core/lib/v3/types/public/agent.ts b/packages/core/lib/v3/types/public/agent.ts index 97d161629..2a3069fdf 100644 --- a/packages/core/lib/v3/types/public/agent.ts +++ b/packages/core/lib/v3/types/public/agent.ts @@ -14,6 +14,9 @@ import { } from "ai"; import { LogLine } from "./logs"; import { ClientOptions } from "./model"; + +// Re-export ModelMessage for consumers who want to use it for conversation continuation +export type { ModelMessage } from "ai"; import { Page as PlaywrightPage } from "playwright-core"; import { Page as PuppeteerPage } from "puppeteer-core"; import { Page as PatchrightPage } from "patchright-core"; @@ -63,6 +66,12 @@ export interface AgentResult { cached_input_tokens?: number; inference_time_ms: number; }; + /** + * The conversation messages from this execution. + * Pass these to a subsequent execute() call via the `messages` option to continue the conversation. + * @experimental + */ + messages?: ModelMessage[]; } export type AgentStreamResult = StreamTextResult & { @@ -207,6 +216,29 @@ export interface AgentExecuteOptionsBase { maxSteps?: number; page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page; highlightCursor?: boolean; + /** + * Previous conversation messages to continue from. + * Pass the `messages` from a previous AgentResult to continue that conversation. + * @experimental + */ + messages?: ModelMessage[]; + /** + * An AbortSignal that can be used to cancel the agent execution. + * When aborted, the agent will stop and return a partial result. + * @experimental + * + * @example + * ```typescript + * const controller = new AbortController(); + * setTimeout(() => controller.abort(), 30000); // 30 second timeout + * + * const result = await agent.execute({ + * instruction: "...", + * signal: controller.signal + * }); + * ``` + */ + signal?: AbortSignal; } /** diff --git a/packages/core/lib/v3/types/public/sdkErrors.ts b/packages/core/lib/v3/types/public/sdkErrors.ts index 39b519cf1..56efe3850 100644 --- a/packages/core/lib/v3/types/public/sdkErrors.ts +++ b/packages/core/lib/v3/types/public/sdkErrors.ts @@ -352,3 +352,15 @@ export class StreamingCallbacksInNonStreamingModeError extends StagehandError { this.invalidCallbacks = invalidCallbacks; } } + +export class AgentAbortError extends StagehandError { + public readonly reason: string; + + constructor(reason?: string) { + const message = reason + ? `Agent execution was aborted: ${reason}` + : "Agent execution was aborted"; + super(message); + this.reason = reason || "aborted"; + } +} diff --git a/packages/core/lib/v3/v3.ts b/packages/core/lib/v3/v3.ts index e564cebbb..d17ebc3a6 100644 --- a/packages/core/lib/v3/v3.ts +++ b/packages/core/lib/v3/v3.ts @@ -60,7 +60,6 @@ import { PatchrightPage, PlaywrightPage, PuppeteerPage, - ExperimentalNotConfiguredError, CuaModelRequiredError, StagehandInvalidArgumentError, StagehandNotInitializedError, @@ -72,6 +71,7 @@ import { V3Context } from "./understudy/context"; import { Page } from "./understudy/page"; import { resolveModel } from "../modelUtils"; import { StagehandAPIClient } from "./api"; +import { validateExperimentalFeatures } from "./agent/utils/validateExperimentalFeatures"; import { createTimeoutGuard } from "./handlers/handlerUtils/timeoutGuard"; import { ActTimeoutError } from "./types/public/sdkErrors"; @@ -1522,12 +1522,7 @@ export class V3 { instruction: string; cacheContext: AgentCacheContext | null; }> { - if ((options?.integrations || options?.tools) && !this.experimental) { - throw new ExperimentalNotConfiguredError( - "MCP integrations and custom tools", - ); - } - + // Note: experimental validation is done at the call site before this method const tools = options?.integrations ? await resolveTools(options.integrations, options.tools) : (options?.tools ?? {}); @@ -1622,17 +1617,11 @@ export class V3 { // If CUA is enabled, use the computer-use agent path if (options?.cua) { - if (options?.stream) { - throw new StagehandInvalidArgumentError( - "Streaming is not supported with CUA (Computer Use Agent) mode. Remove either 'stream: true' or 'cua: true' from your agent config.", - ); - } - - if ((options?.integrations || options?.tools) && !this.experimental) { - throw new ExperimentalNotConfiguredError( - "MCP integrations and custom tools", - ); - } + // Validate agent config at creation time (includes CUA+streaming conflict check) + validateExperimentalFeatures({ + isExperimental: this.experimental, + agentConfig: options, + }); const modelToUse = options?.model || { modelName: this.modelName, @@ -1650,9 +1639,15 @@ export class V3 { return { execute: async (instructionOrOptions: string | AgentExecuteOptions) => withInstanceLogContext(this.instanceId, async () => { - if (options?.integrations && !this.experimental) { - throw new ExperimentalNotConfiguredError("MCP integrations"); - } + validateExperimentalFeatures({ + isExperimental: this.experimental, + agentConfig: options, + executeOptions: + typeof instructionOrOptions === "object" + ? instructionOrOptions + : null, + }); + const tools = options?.integrations ? await resolveTools(options.integrations, options.tools) : (options?.tools ?? {}); @@ -1752,25 +1747,24 @@ export class V3 { | AgentStreamExecuteOptions, ): Promise => withInstanceLogContext(this.instanceId, async () => { - if ( - typeof instructionOrOptions === "object" && - instructionOrOptions.callbacks && - !this.experimental - ) { - throw new ExperimentalNotConfiguredError("Agent callbacks"); - } + validateExperimentalFeatures({ + isExperimental: this.experimental, + agentConfig: options, + executeOptions: + typeof instructionOrOptions === "object" + ? instructionOrOptions + : null, + isStreaming, + }); // Streaming mode if (isStreaming) { - if (!this.experimental) { - throw new ExperimentalNotConfiguredError("Agent streaming"); - } - - const { handler, cacheContext } = await this.prepareAgentExecution( - options, - instructionOrOptions, - agentConfigSignature, - ); + const { handler, resolvedOptions, cacheContext } = + await this.prepareAgentExecution( + options, + instructionOrOptions, + agentConfigSignature, + ); if (cacheContext) { const replayed = @@ -1781,7 +1775,7 @@ export class V3 { } const streamResult = await handler.stream( - instructionOrOptions as string | AgentStreamExecuteOptions, + resolvedOptions as AgentStreamExecuteOptions, ); if (cacheContext) { @@ -1829,7 +1823,7 @@ export class V3 { ); } else { result = await handler.execute( - instructionOrOptions as string | AgentExecuteOptions, + resolvedOptions as AgentExecuteOptions, ); } if (recording) { diff --git a/packages/core/tests/public-api/public-error-types.test.ts b/packages/core/tests/public-api/public-error-types.test.ts index 6c325bb1b..442238d0e 100644 --- a/packages/core/tests/public-api/public-error-types.test.ts +++ b/packages/core/tests/public-api/public-error-types.test.ts @@ -2,6 +2,7 @@ import { describe, expectTypeOf, it } from "vitest"; import * as Stagehand from "../../dist"; export const publicErrorTypes = { + AgentAbortError: Stagehand.AgentAbortError, AgentScreenshotProviderError: Stagehand.AgentScreenshotProviderError, BrowserbaseSessionNotFoundError: Stagehand.BrowserbaseSessionNotFoundError, CaptchaTimeoutError: Stagehand.CaptchaTimeoutError, diff --git a/packages/core/tests/public-api/public-types.test.ts b/packages/core/tests/public-api/public-types.test.ts index 28b3f7bc9..0874d58e5 100644 --- a/packages/core/tests/public-api/public-types.test.ts +++ b/packages/core/tests/public-api/public-types.test.ts @@ -52,6 +52,7 @@ type ExpectedExportedTypes = { AgentStreamCallbacks: Stagehand.AgentStreamCallbacks; AgentExecuteOptionsBase: Stagehand.AgentExecuteOptionsBase; AgentStreamExecuteOptions: Stagehand.AgentStreamExecuteOptions; + ModelMessage: Stagehand.ModelMessage; // Types from logs.ts LogLevel: Stagehand.LogLevel; LogLine: Stagehand.LogLine; @@ -183,6 +184,8 @@ describe("Stagehand public API types", () => { maxSteps?: number; page?: Stagehand.AnyPage; highlightCursor?: boolean; + messages?: Stagehand.ModelMessage[]; + signal?: AbortSignal; callbacks?: Stagehand.AgentExecuteCallbacks; }; @@ -197,6 +200,8 @@ describe("Stagehand public API types", () => { maxSteps?: number; page?: Stagehand.AnyPage; highlightCursor?: boolean; + messages?: Stagehand.ModelMessage[]; + signal?: AbortSignal; callbacks?: Stagehand.AgentStreamCallbacks; }; @@ -235,6 +240,7 @@ describe("Stagehand public API types", () => { cached_input_tokens?: number; inference_time_ms: number; }; + messages?: Stagehand.ModelMessage[]; }; it("matches expected type shape", () => {