diff --git a/src/browser/components/AppLoader.auth.test.tsx b/src/browser/components/AppLoader.auth.test.tsx index fb2331920c..2a953cc7d0 100644 --- a/src/browser/components/AppLoader.auth.test.tsx +++ b/src/browser/components/AppLoader.auth.test.tsx @@ -15,9 +15,16 @@ void mock.module("@/browser/contexts/API", () => ({ })); void mock.module("@/browser/components/AuthTokenModal", () => ({ + // Note: Module mocks leak between bun test files. + // Export all commonly-used symbols to avoid cross-test import errors. AuthTokenModal: (props: { error?: string | null }) => (
{props.error ?? "no-error"}
), + getStoredAuthToken: () => null, + // eslint-disable-next-line @typescript-eslint/no-empty-function + setStoredAuthToken: () => {}, + // eslint-disable-next-line @typescript-eslint/no-empty-function + clearStoredAuthToken: () => {}, })); import { AppLoader } from "./AppLoader"; diff --git a/src/browser/components/Messages/ToolMessage.tsx b/src/browser/components/Messages/ToolMessage.tsx index 44039a328a..f5c7999dc9 100644 --- a/src/browser/components/Messages/ToolMessage.tsx +++ b/src/browser/components/Messages/ToolMessage.tsx @@ -175,6 +175,73 @@ function isTaskListTool(toolName: string, args: unknown): args is TaskListToolAr return TOOL_DEFINITIONS.task_list.schema.safeParse(args).success; } +function isTaskBashArgs(args: TaskToolArgs): args is TaskToolArgs & { + kind: "bash"; + script: string; + display_name: string; + timeout_secs: number; +} { + return "kind" in args && args.kind === "bash"; +} + +function taskBashResultToBashToolResult( + result: TaskToolSuccessResult | undefined +): BashToolResult | undefined { + if (!result) return undefined; + + if (result.status !== "completed") { + const taskId = result.taskId; + if (typeof taskId === "string" && taskId.startsWith("bash:")) { + const processId = taskId.slice("bash:".length).trim() || taskId; + return { + success: true, + output: "", + exitCode: 0, + wall_duration_ms: 0, + backgroundProcessId: processId, + }; + } + return undefined; + } + + const report = result.reportMarkdown ?? ""; + + const exitCodeMatch = /exitCode:\s*(-?\d+)/.exec(report); + const parsedExitCode = exitCodeMatch ? Number(exitCodeMatch[1]) : undefined; + const exitCode = result.exitCode ?? (Number.isFinite(parsedExitCode) ? parsedExitCode! : 0); + + const wallDurationMatch = /wall_duration_ms:\s*(\d+)/.exec(report); + const parsedWallDuration = wallDurationMatch ? Number(wallDurationMatch[1]) : undefined; + const wall_duration_ms = Number.isFinite(parsedWallDuration) ? parsedWallDuration! : 0; + + const textBlockMatch = /```text\n([\s\S]*?)\n```/.exec(report); + const output = textBlockMatch ? textBlockMatch[1] : ""; + + const errorLineMatch = /^error:\s*(.*)$/m.exec(report); + const error = errorLineMatch?.[1] ?? `Command exited with code ${exitCode}`; + + if (exitCode === 0) { + return { + success: true, + output, + exitCode: 0, + wall_duration_ms, + note: result.note, + truncated: result.truncated, + }; + } + + return { + success: false, + output: output.length > 0 ? output : undefined, + exitCode, + error, + wall_duration_ms, + note: result.note, + truncated: result.truncated, + }; +} + function isTaskTerminateTool(toolName: string, args: unknown): args is TaskTerminateToolArgs { if (toolName !== "task_terminate") return false; return TOOL_DEFINITIONS.task_terminate.schema.safeParse(args).success; @@ -389,6 +456,39 @@ export const ToolMessage: React.FC = ({ } if (isTaskTool(message.toolName, message.args)) { + if (isTaskBashArgs(message.args)) { + const canSendToBackground = foregroundBashToolCallIds?.has(message.toolCallId) ?? false; + const toolCallId = message.toolCallId; + + const bashArgs: BashToolArgs = { + script: message.args.script, + timeout_secs: message.args.timeout_secs, + run_in_background: message.args.run_in_background, + display_name: message.args.display_name, + }; + + const bashResult = taskBashResultToBashToolResult( + message.result as TaskToolSuccessResult | undefined + ); + + return ( +
+ onSendBashToBackground(toolCallId) : undefined + } + /> +
+ ); + } + return (
= ({ id, className }) // TASK TOOL CALL (spawn sub-agent) // ═══════════════════════════════════════════════════════════════════════════════ +function isBashTaskArgs(args: TaskToolArgs): args is TaskToolArgs & { + kind: "bash"; + script: string; + display_name: string; + timeout_secs: number; +} { + return args.kind === "bash"; +} interface TaskToolCallProps { args: TaskToolArgs; result?: TaskToolSuccessResult; @@ -173,20 +181,37 @@ export const TaskToolCall: React.FC = ({ args, result, status const hasReport = result?.status === "completed" && !!result.reportMarkdown; const { expanded, toggleExpanded } = useToolExpansion(hasReport); - const isBackground = args.run_in_background ?? false; - const agentType = args.subagent_type; - const prompt = args.prompt; - const title = args.title; + const isBackground = args.run_in_background; + + let isBashTask: boolean; + let title: string; + let promptOrScript: string; + let kindBadge: React.ReactNode; + + if (isBashTaskArgs(args)) { + isBashTask = true; + title = args.display_name ?? "Bash task"; + promptOrScript = args.script ?? ""; + kindBadge = ; + } else { + isBashTask = false; + title = args.title ?? "Task"; + promptOrScript = args.prompt ?? ""; + kindBadge = ; + } // Derive task state from result const taskId = result?.taskId; const taskStatus = result?.status; const reportMarkdown = result?.status === "completed" ? result.reportMarkdown : undefined; const reportTitle = result?.status === "completed" ? result.title : undefined; + const exitCode = result?.status === "completed" ? result.exitCode : undefined; - // Show preview of prompt (first line or truncated) - const promptPreview = - prompt.length > 60 ? prompt.slice(0, 60).trim() + "…" : prompt.split("\n")[0]; + // Show preview (first line or truncated) + const preview = + promptOrScript.length > 60 + ? promptOrScript.slice(0, 60).trim() + "…" + : promptOrScript.split("\n")[0]; return ( @@ -194,7 +219,7 @@ export const TaskToolCall: React.FC = ({ args, result, status task - + {kindBadge} {isBackground && ( background )} @@ -205,26 +230,33 @@ export const TaskToolCall: React.FC = ({ args, result, status {/* Task info surface */}
-
+
{reportTitle ?? title} {taskId && } {taskStatus && } + {exitCode !== undefined && ( + exit {exitCode} + )}
- {/* Prompt section */} + {/* Prompt / script */}
-
Prompt
-
- {prompt} +
+ {isBashTask ? "Script" : "Prompt"} +
+
+ {promptOrScript}
{/* Report section */} {reportMarkdown && (
-
Report
+
+ {isBashTask ? "Output" : "Report"} +
@@ -243,7 +275,7 @@ export const TaskToolCall: React.FC = ({ args, result, status )} {/* Collapsed preview */} - {!expanded &&
{promptPreview}
} + {!expanded &&
{preview}
} ); }; @@ -270,6 +302,12 @@ export const TaskAwaitToolCall: React.FC = ({ const timeoutSecs = args.timeout_secs; const results = result?.results ?? []; + const showConfigInfo = + taskIds !== undefined || + timeoutSecs !== undefined || + args.filter !== undefined || + args.filter_exclude === true; + // Summary for header const completedCount = results.filter((r) => r.status === "completed").length; const totalCount = results.length; @@ -292,10 +330,12 @@ export const TaskAwaitToolCall: React.FC = ({
{/* Config info */} - {(taskIds ?? timeoutSecs) && ( + {showConfigInfo && (
- {taskIds && Waiting for: {taskIds.length} task(s)} - {timeoutSecs && Timeout: {timeoutSecs}s} + {taskIds !== undefined && Waiting for: {taskIds.length} task(s)} + {timeoutSecs !== undefined && Timeout: {timeoutSecs}s} + {args.filter !== undefined && Filter: {args.filter}} + {args.filter_exclude === true && Exclude: true}
)} @@ -329,20 +369,35 @@ const TaskAwaitResult: React.FC<{ const reportMarkdown = isCompleted ? result.reportMarkdown : undefined; const title = isCompleted ? result.title : undefined; + const output = "output" in result ? result.output : undefined; + const note = "note" in result ? result.note : undefined; + const exitCode = "exitCode" in result ? result.exitCode : undefined; + const elapsedMs = "elapsed_ms" in result ? result.elapsed_ms : undefined; + return (
-
+
{title && {title}} + {exitCode !== undefined && exit {exitCode}} + {elapsedMs !== undefined && {elapsedMs}ms}
+ {!isCompleted && output && output.length > 0 && ( +
+ {output} +
+ )} + {reportMarkdown && (
)} + {note &&
{note}
} + {"error" in result && result.error && (
{result.error}
)} diff --git a/src/browser/contexts/API.test.tsx b/src/browser/contexts/API.test.tsx index 08bcc31989..8642e4bca2 100644 --- a/src/browser/contexts/API.test.tsx +++ b/src/browser/contexts/API.test.tsx @@ -66,8 +66,13 @@ void mock.module("@orpc/client/message-port", () => ({ })); void mock.module("@/browser/components/AuthTokenModal", () => ({ + // Note: Module mocks leak between bun test files. + // Export all commonly-used symbols to avoid cross-test import errors. + AuthTokenModal: () => null, getStoredAuthToken: () => null, // eslint-disable-next-line @typescript-eslint/no-empty-function + setStoredAuthToken: () => {}, + // eslint-disable-next-line @typescript-eslint/no-empty-function clearStoredAuthToken: () => {}, })); diff --git a/src/browser/hooks/useVoiceInput.ts b/src/browser/hooks/useVoiceInput.ts index 693b664037..e6af468654 100644 --- a/src/browser/hooks/useVoiceInput.ts +++ b/src/browser/hooks/useVoiceInput.ts @@ -56,8 +56,12 @@ export interface UseVoiceInputResult { * We hide our voice UI on these devices to avoid redundancy with system dictation. */ function hasTouchDictation(): boolean { - if (typeof window === "undefined") return false; - const hasTouch = "ontouchstart" in window || navigator.maxTouchPoints > 0; + if (typeof window === "undefined" || typeof navigator === "undefined") return false; + + const maxTouchPoints = + typeof navigator.maxTouchPoints === "number" ? navigator.maxTouchPoints : 0; + const hasTouch = "ontouchstart" in window || maxTouchPoints > 0; + // Touch-only check: most touch devices have native dictation. // We don't check screen size because iPads are large but still have dictation. return hasTouch; @@ -66,7 +70,9 @@ function hasTouchDictation(): boolean { const HAS_TOUCH_DICTATION = hasTouchDictation(); const HAS_MEDIA_RECORDER = typeof window !== "undefined" && typeof MediaRecorder !== "undefined"; const HAS_GET_USER_MEDIA = - typeof window !== "undefined" && typeof navigator.mediaDevices?.getUserMedia === "function"; + typeof window !== "undefined" && + typeof navigator !== "undefined" && + typeof navigator.mediaDevices?.getUserMedia === "function"; // ============================================================================= // Global Key State Tracking @@ -79,7 +85,7 @@ const HAS_GET_USER_MEDIA = */ let isSpaceCurrentlyHeld = false; -if (typeof window !== "undefined") { +if (typeof window !== "undefined" && typeof window.addEventListener === "function") { window.addEventListener( "keydown", (e) => { diff --git a/src/browser/utils/RefreshController.test.ts b/src/browser/utils/RefreshController.test.ts index e4aa56babc..b1aae0c719 100644 --- a/src/browser/utils/RefreshController.test.ts +++ b/src/browser/utils/RefreshController.test.ts @@ -1,18 +1,38 @@ -import { describe, it, expect, beforeEach, afterEach, jest } from "@jest/globals"; +import { describe, it, expect, afterEach, jest, setSystemTime } from "bun:test"; import { RefreshController } from "./RefreshController"; +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + describe("RefreshController", () => { - beforeEach(() => { - jest.useFakeTimers(); + afterEach(() => { + // Some tests manipulate Date.now via setSystemTime(); always restore. + setSystemTime(); }); - afterEach(() => { - jest.useRealTimers(); + it("debounces schedule() calls (resets timer)", async () => { + const onRefresh = jest.fn<() => void>(); + const controller = new RefreshController({ onRefresh, debounceMs: 50 }); + + controller.schedule(); + await sleep(20); + controller.schedule(); // Resets debounce timer + + // Not yet: only 30ms since last call (< debounceMs) + await sleep(30); + expect(onRefresh).not.toHaveBeenCalled(); + + // Now past debounceMs since last call + await sleep(30); + expect(onRefresh).toHaveBeenCalledTimes(1); + + controller.dispose(); }); - it("debounces multiple schedule() calls", () => { + it("coalesces calls during debounce window", async () => { const onRefresh = jest.fn<() => void>(); - const controller = new RefreshController({ onRefresh, debounceMs: 100 }); + const controller = new RefreshController({ onRefresh, debounceMs: 50 }); controller.schedule(); controller.schedule(); @@ -20,16 +40,16 @@ describe("RefreshController", () => { expect(onRefresh).not.toHaveBeenCalled(); - jest.advanceTimersByTime(100); + await sleep(60); expect(onRefresh).toHaveBeenCalledTimes(1); controller.dispose(); }); - it("requestImmediate() bypasses debounce", () => { + it("requestImmediate() bypasses debounce timer", async () => { const onRefresh = jest.fn<() => void>(); - const controller = new RefreshController({ onRefresh, debounceMs: 100 }); + const controller = new RefreshController({ onRefresh, debounceMs: 50 }); controller.schedule(); expect(onRefresh).not.toHaveBeenCalled(); @@ -37,8 +57,8 @@ describe("RefreshController", () => { controller.requestImmediate(); expect(onRefresh).toHaveBeenCalledTimes(1); - // Original debounce timer should be cleared - jest.advanceTimersByTime(100); + // Original timer should be cleared + await sleep(60); expect(onRefresh).toHaveBeenCalledTimes(1); controller.dispose(); @@ -64,6 +84,49 @@ describe("RefreshController", () => { controller.dispose(); }); + it("schedule() during in-flight queues refresh for after completion", async () => { + const resolvers: Array<() => void> = []; + const onRefresh = jest.fn( + () => + new Promise((resolve) => { + resolvers.push(resolve); + }) + ); + + const controller = new RefreshController({ onRefresh, debounceMs: 20 }); + + // Start first refresh + controller.requestImmediate(); + expect(onRefresh).toHaveBeenCalledTimes(1); + expect(resolvers).toHaveLength(1); + + // schedule() while in-flight should queue, not start a second refresh + controller.schedule(); + + // Ensure the debounce timer has fired while we're still in-flight. + await sleep(30); + expect(onRefresh).toHaveBeenCalledTimes(1); + + // Complete the first refresh and let .finally() run. + resolvers[0](); + await Promise.resolve(); + await Promise.resolve(); // Extra tick for .finally() + + // Allow post-flight setTimeout(0) to run + await sleep(0); + await sleep(10); + + expect(onRefresh).toHaveBeenCalledTimes(2); + expect(resolvers).toHaveLength(2); + + // Resolve the follow-up refresh promise to avoid leaving it in-flight. + resolvers[1](); + await Promise.resolve(); + await Promise.resolve(); + + controller.dispose(); + }); + it("isRefreshing reflects in-flight state", () => { let resolveRefresh: () => void; const refreshPromise = new Promise((resolve) => { @@ -84,27 +147,27 @@ describe("RefreshController", () => { controller.dispose(); }); - it("dispose() cleans up debounce timer", () => { + it("dispose() cleans up debounce timer", async () => { const onRefresh = jest.fn<() => void>(); - const controller = new RefreshController({ onRefresh, debounceMs: 100 }); + const controller = new RefreshController({ onRefresh, debounceMs: 50 }); controller.schedule(); controller.dispose(); - jest.advanceTimersByTime(100); + await sleep(60); expect(onRefresh).not.toHaveBeenCalled(); }); - it("does not refresh after dispose", () => { + it("does not refresh after dispose", async () => { const onRefresh = jest.fn<() => void>(); - const controller = new RefreshController({ onRefresh, debounceMs: 100 }); + const controller = new RefreshController({ onRefresh, debounceMs: 50 }); controller.dispose(); controller.schedule(); controller.requestImmediate(); - jest.advanceTimersByTime(100); + await sleep(60); expect(onRefresh).not.toHaveBeenCalled(); }); diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index f84854f291..f4d97ca167 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -94,7 +94,7 @@ const SubagentTypeSchema = z.preprocess( z.enum(BUILT_IN_SUBAGENT_TYPES) ); -export const TaskToolArgsSchema = z +const TaskToolAgentArgsSchema = z .object({ subagent_type: SubagentTypeSchema, prompt: z.string().min(1), @@ -103,6 +103,54 @@ export const TaskToolArgsSchema = z }) .strict(); +const TaskToolBashArgsSchema = z + .object({ + kind: z.literal("bash"), + script: z.string().min(1), + timeout_secs: z.number().positive(), + run_in_background: z.boolean().default(false), + display_name: z.string().min(1), + }) + .strict(); + +// NOTE: Several providers require tool schemas to be a *single* JSON Schema object. +// In particular, Anthropic rejects union/anyOf schemas for tool input. +// +// To keep the provider-facing schema as `type: "object"` while still enforcing a strict +// agent-vs-bash split, we validate via superRefine against the appropriate strict schema. +export const TaskToolArgsSchema = z + .object({ + // Discriminator for bash tasks. Omit for agent tasks. + kind: z.literal("bash").optional(), + + // Agent task args + subagent_type: SubagentTypeSchema.optional(), + prompt: z.string().min(1).optional(), + title: z.string().min(1).optional(), + + // Shared + run_in_background: z.boolean().default(false), + + // Bash task args + script: z.string().min(1).optional(), + timeout_secs: z.number().positive().optional(), + display_name: z.string().min(1).optional(), + }) + .strict() + .superRefine((args, ctx) => { + const strictSchema = args.kind === "bash" ? TaskToolBashArgsSchema : TaskToolAgentArgsSchema; + const parsed = strictSchema.safeParse(args); + if (!parsed.success) { + for (const issue of parsed.error.issues) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: issue.message, + path: issue.path, + }); + } + } + }); + export const TaskToolQueuedResultSchema = z .object({ status: z.enum(["queued", "running"]), @@ -117,6 +165,14 @@ export const TaskToolCompletedResultSchema = z reportMarkdown: z.string(), title: z.string().optional(), agentType: z.string().optional(), + exitCode: z.number().optional(), + note: z.string().optional(), + truncated: z + .object({ + reason: z.string(), + totalLines: z.number(), + }) + .optional(), }) .strict(); @@ -137,12 +193,28 @@ export const TaskAwaitToolArgsSchema = z .describe( "List of task IDs to await. When omitted, waits for all active descendant tasks of the current workspace." ), + filter: z + .string() + .optional() + .describe( + "Optional regex to filter bash task output lines. By default, only matching lines are returned. " + + "When filter_exclude is true, matching lines are excluded instead. " + + "Non-matching lines are discarded and cannot be retrieved later." + ), + filter_exclude: z + .boolean() + .optional() + .describe( + "When true, lines matching 'filter' are excluded instead of kept. " + + "Requires 'filter' to be set." + ), timeout_secs: z .number() - .positive() + .min(0) .optional() .describe( "Maximum time to wait in seconds for each task. " + + "For bash tasks, this waits for NEW output (or process exit). " + "If exceeded, the result returns status=queued|running|awaiting_report (task is still active). " + "Optional, defaults to 10 minutes." ), @@ -155,6 +227,10 @@ export const TaskAwaitToolCompletedResultSchema = z taskId: z.string(), reportMarkdown: z.string(), title: z.string().optional(), + output: z.string().optional(), + elapsed_ms: z.number().optional(), + exitCode: z.number().optional(), + note: z.string().optional(), }) .strict(); @@ -162,6 +238,9 @@ export const TaskAwaitToolActiveResultSchema = z .object({ status: z.enum(["queued", "running", "awaiting_report"]), taskId: z.string(), + output: z.string().optional(), + elapsed_ms: z.number().optional(), + note: z.string().optional(), }) .strict(); @@ -513,15 +592,18 @@ export const TOOL_DEFINITIONS = { }, task: { description: - "Spawn a sub-agent task in a child workspace. " + - 'Use this to delegate work to specialized presets like "explore" (read-only investigation) or "exec" (general-purpose coding in a child workspace). ' + - "If run_in_background is false, this tool blocks until the sub-agent calls agent_report, then returns the report. " + - "If run_in_background is true, you can await it later with task_await.", + "Unified task tool for (1) spawning sub-agent tasks and (2) running bash commands. " + + "\n\nAgent tasks: provide subagent_type, prompt, title, run_in_background. " + + '\nBash tasks: set kind="bash" and provide script, timeout_secs, display_name, run_in_background. ' + + "\n\nIf run_in_background is false, returns a completed reportMarkdown. " + + "If run_in_background is true, returns a running taskId; use task_await to read incremental output and task_terminate to stop it.", schema: TaskToolArgsSchema, }, task_await: { description: - "Wait for one or more sub-agent tasks to finish and return their reports. " + + "Wait for one or more tasks to produce output. " + + "Agent tasks return reports when completed. " + + "Bash tasks return incremental output while running and a final reportMarkdown when they exit. " + "Use this tool to WAIT; do not poll task_list in a loop to wait for task completion (that is misuse and wastes tool calls). " + "This is similar to Promise.allSettled(): you always get per-task results. " + "Possible statuses: completed, queued, running, awaiting_report, not_found, invalid_scope, error.", @@ -529,15 +611,16 @@ export const TOOL_DEFINITIONS = { }, task_terminate: { description: - "Terminate one or more sub-agent tasks immediately. " + - "This stops their AI streams and deletes their workspaces (best-effort). " + + "Terminate one or more tasks immediately (sub-agent tasks or background bash tasks). " + + "For sub-agent tasks, this stops their AI streams and deletes their workspaces (best-effort). " + "No report will be delivered; any in-progress work is discarded. " + "If the task has descendant sub-agent tasks, they are terminated too.", schema: TaskTerminateToolArgsSchema, }, task_list: { description: - "List descendant sub-agent tasks for the current workspace, including their status and metadata. " + + "List descendant tasks for the current workspace, including status + metadata. " + + "This includes sub-agent tasks and background bash tasks. " + "Use this after compaction or interruptions to rediscover which tasks are still active. " + "This is a discovery tool, NOT a waiting mechanism: if you need to wait for tasks to finish, call task_await (optionally omit task_ids to await all active descendant tasks).", schema: TaskListToolArgsSchema, @@ -961,10 +1044,6 @@ export function getAvailableTools( // Base tools available for all models const baseTools = [ - "bash", - "bash_output", - "bash_background_list", - "bash_background_terminate", "file_read", "agent_skill_read", "agent_skill_read_file", diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index e31c485c34..9bd3d68b6e 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -20,6 +20,7 @@ import { createAgentSkillReadFileTool } from "@/node/services/tools/agent_skill_ import { createAgentReportTool } from "@/node/services/tools/agent_report"; import { wrapWithInitWait } from "@/node/services/tools/wrapWithInitWait"; import { log } from "@/node/services/log"; +import { getAvailableTools } from "@/common/utils/tools/toolDefinitions"; import { sanitizeMCPToolsForOpenAI } from "@/common/utils/tools/schemaSanitizer"; import type { Runtime } from "@/node/runtime/Runtime"; @@ -139,10 +140,19 @@ export async function getToolsForModel( // to leave repository in broken state due to issues with concurrent file modifications // and line number miscalculations. Use file_edit_replace_string instead. // file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)), + + // Unified task abstraction (agent + bash) + task: wrap(createTaskTool(config)), + task_await: wrap(createTaskAwaitTool(config)), + task_terminate: wrap(createTaskTerminateTool(config)), + task_list: wrap(createTaskListTool(config)), + + // Legacy bash tools (deprecated: prefer task(kind="bash")) bash: wrap(createBashTool(config)), bash_output: wrap(createBashOutputTool(config)), bash_background_list: wrap(createBashBackgroundListTool(config)), bash_background_terminate: wrap(createBashBackgroundTerminateTool(config)), + web_fetch: wrap(createWebFetchTool(config)), }; @@ -150,10 +160,6 @@ export async function getToolsForModel( const nonRuntimeTools: Record = { ...(config.mode === "plan" ? { ask_user_question: createAskUserQuestionTool(config) } : {}), propose_plan: createProposePlanTool(config), - task: createTaskTool(config), - task_await: createTaskAwaitTool(config), - task_terminate: createTaskTerminateTool(config), - task_list: createTaskListTool(config), ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}), todo_write: createTodoWriteTool(config), todo_read: createTodoReadTool(config), @@ -220,6 +226,19 @@ export async function getToolsForModel( log.error(`No web search tools available for ${provider}:`, error); } + // Filter tools to the canonical allowlist so system prompt + toolset stay in sync. + // Include MCP tools even if they're not in getAvailableTools(). + const allowlistedToolNames = new Set( + getAvailableTools(modelString, config.mode, { enableAgentReport: config.enableAgentReport }) + ); + for (const toolName of Object.keys(mcpTools ?? {})) { + allowlistedToolNames.add(toolName); + } + + allTools = Object.fromEntries( + Object.entries(allTools).filter(([toolName]) => allowlistedToolNames.has(toolName)) + ); + // Apply tool-specific instructions if provided if (toolInstructions) { const augmentedTools: Record = {}; diff --git a/src/node/services/agentPresets.ts b/src/node/services/agentPresets.ts index f5d1bee1a4..bc71b6f77c 100644 --- a/src/node/services/agentPresets.ts +++ b/src/node/services/agentPresets.ts @@ -47,9 +47,6 @@ function buildSystemPrompt(args: { const EXEC_PRESET: AgentPreset = { agentType: "exec", toolPolicy: [ - // Non-recursive: subagents must not spawn more subagents. - { regex_match: "task", action: "disable" }, - { regex_match: "task_.*", action: "disable" }, // Only the main plan-mode session should call propose_plan. { regex_match: "propose_plan", action: "disable" }, ], @@ -60,7 +57,7 @@ const EXEC_PRESET: AgentPreset = { "- Make minimal, correct changes that match existing codebase patterns.", ], rules: [ - "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).", + "- You MUST NOT spawn additional sub-agent tasks.", "- Do not call propose_plan.", "- Prefer small, reviewable diffs and run targeted checks when feasible.", ], @@ -71,10 +68,10 @@ const EXPLORE_PRESET: AgentPreset = { agentType: "explore", toolPolicy: enableOnly( "file_read", - "bash", - "bash_output", - "bash_background_list", - "bash_background_terminate", + "task", + "task_await", + "task_list", + "task_terminate", "web_fetch", "web_search", "google_search", @@ -92,8 +89,8 @@ const EXPLORE_PRESET: AgentPreset = { "- You MUST NOT create temporary files anywhere (including /tmp).", "- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.", "- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).", - "- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).", - "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).", + '- Use task(kind="bash") only for read-only operations (rg, ls, cat, git diff/show/log, etc.).', + "- You MUST NOT spawn additional sub-agent tasks.", ], }), }; diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index eb4f0c1b0b..e0852a13ce 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1263,7 +1263,7 @@ export class AIService extends EventEmitter { "", "Nesting:", `- Task delegation is disabled in this workspace (taskDepth=${taskDepth}, maxTaskNestingDepth=${taskSettings.maxTaskNestingDepth}).`, - "- Do not call task/task_await/task_list/task_terminate.", + "- You MUST NOT spawn additional sub-agent tasks.", ].join("\n") : agentPreset.systemPrompt : undefined; @@ -1370,12 +1370,9 @@ export class AIService extends EventEmitter { mcpTools ); - const depthToolPolicy: ToolPolicy = shouldDisableTaskToolsForDepth - ? [ - { regex_match: "task", action: "disable" }, - { regex_match: "task_.*", action: "disable" }, - ] - : []; + // Note: task is the unified abstraction for both agent delegation and bash execution. + // Do not disable it at max depth; rely on TaskService/createTaskTool to reject delegation. + const depthToolPolicy: ToolPolicy = []; // Preset + depth tool policies must be applied last so callers cannot re-enable restricted tools. const effectiveToolPolicy = diff --git a/src/node/services/backgroundProcessManager.ts b/src/node/services/backgroundProcessManager.ts index 65e910d9f6..25d3beb846 100644 --- a/src/node/services/backgroundProcessManager.ts +++ b/src/node/services/backgroundProcessManager.ts @@ -454,6 +454,7 @@ export class BackgroundProcessManager extends EventEmitter= 3 && filterExclude && currentStatus === "running"; + const pollingToolName = noteToolName ?? "bash_output"; + let note: string | undefined; if (shouldSuggestFilterExclude) { note = - "STOP POLLING. You've called bash_output 3+ times on this process. " + + `STOP POLLING. You've called ${pollingToolName} 3+ times on this process. ` + "This wastes tokens and clutters the conversation. " + "Instead, make ONE call with: filter='⏳|progress|waiting|\\\\\\.\\\\\\.\\\\\\.', " + "filter_exclude=true, timeout_secs=120. This blocks until meaningful output arrives."; diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts index c6ec24cc25..95b2e8bc87 100644 --- a/src/node/services/taskService.test.ts +++ b/src/node/services/taskService.test.ts @@ -1173,6 +1173,48 @@ describe("TaskService", () => { expect(report.title).toBe("t"); }); + test("isDescendantAgentTask consults cached ancestry after workspace is removed", async () => { + const config = await createTestConfig(rootDir); + + const projectPath = path.join(rootDir, "repo"); + const parentId = "parent-111"; + const childId = "child-222"; + + await config.saveConfig({ + projects: new Map([ + [ + projectPath, + { + workspaces: [ + { path: path.join(projectPath, "parent"), id: parentId, name: "parent" }, + { + path: path.join(projectPath, "child"), + id: childId, + name: "agent_explore_child", + parentWorkspaceId: parentId, + agentType: "explore", + taskStatus: "running", + }, + ], + }, + ], + ]), + taskSettings: { maxParallelAgentTasks: 1, maxTaskNestingDepth: 3 }, + }); + + const { taskService } = createTaskServiceHarness(config); + + const internal = taskService as unknown as { + resolveWaiters: (taskId: string, report: { reportMarkdown: string; title?: string }) => void; + }; + internal.resolveWaiters(childId, { reportMarkdown: "ok", title: "t" }); + + await config.removeWorkspace(childId); + + expect(taskService.isDescendantAgentTask(parentId, childId)).toBe(true); + expect(taskService.isDescendantAgentTask("other-parent", childId)).toBe(false); + }); + test("waitForAgentReport cache is cleared by TTL cleanup", async () => { const config = await createTestConfig(rootDir); diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index 44e53c6be5..8fb617fa74 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -94,6 +94,15 @@ interface PendingTaskStartWaiter { cleanup: () => void; } +interface CompletedAgentReportCacheEntry { + reportMarkdown: string; + title?: string; + expiresAtMs: number; + // Ancestor workspace IDs captured when the report was cached. + // Used to keep descendant-scope checks working even if the task workspace is cleaned up. + ancestorWorkspaceIds: string[]; +} + function isToolCallEndEvent(value: unknown): value is ToolCallEndEvent { return ( typeof value === "object" && @@ -173,10 +182,7 @@ export class TaskService { private readonly foregroundAwaitCountByWorkspaceId = new Map(); // Cache completed reports so callers can retrieve them even after the task workspace is removed. // Bounded by TTL + max entries (see COMPLETED_REPORT_CACHE_*). - private readonly completedReportsByTaskId = new Map< - string, - { reportMarkdown: string; title?: string; expiresAtMs: number } - >(); + private readonly completedReportsByTaskId = new Map(); private readonly remindedAwaitingReport = new Set(); constructor( @@ -1049,7 +1055,20 @@ export class TaskService { const cfg = this.config.loadConfigOrDefault(); const parentById = this.buildAgentTaskIndex(cfg).parentById; - return this.isDescendantAgentTaskUsingParentById(parentById, ancestorWorkspaceId, taskId); + if (this.isDescendantAgentTaskUsingParentById(parentById, ancestorWorkspaceId, taskId)) { + return true; + } + + // The task workspace may have been removed after it reported (cleanup). Preserve scope checks + // by consulting the completed-report cache, which tracks the task's ancestor chain. + const nowMs = Date.now(); + this.cleanupExpiredCompletedReports(nowMs); + const cached = this.completedReportsByTaskId.get(taskId); + if (cached && cached.expiresAtMs > nowMs) { + return cached.ancestorWorkspaceIds.includes(ancestorWorkspaceId); + } + + return false; } private isDescendantAgentTaskUsingParentById( @@ -1072,6 +1091,25 @@ export class TaskService { // --- Internal orchestration --- + private listAncestorWorkspaceIdsUsingParentById( + parentById: Map, + taskId: string + ): string[] { + const ancestors: string[] = []; + + let current = taskId; + for (let i = 0; i < 32; i++) { + const parent = parentById.get(current); + if (!parent) return ancestors; + ancestors.push(parent); + current = parent; + } + + throw new Error( + `listAncestorWorkspaceIdsUsingParentById: possible parentWorkspaceId cycle starting at ${taskId}` + ); + } + private listAgentTaskWorkspaces( config: ReturnType ): AgentTaskWorkspaceEntry[] { @@ -1842,10 +1880,16 @@ export class TaskService { private resolveWaiters(taskId: string, report: { reportMarkdown: string; title?: string }): void { const nowMs = Date.now(); this.cleanupExpiredCompletedReports(nowMs); + + const cfg = this.config.loadConfigOrDefault(); + const parentById = this.buildAgentTaskIndex(cfg).parentById; + const ancestorWorkspaceIds = this.listAncestorWorkspaceIdsUsingParentById(parentById, taskId); + this.completedReportsByTaskId.set(taskId, { reportMarkdown: report.reportMarkdown, title: report.title, expiresAtMs: nowMs + COMPLETED_REPORT_CACHE_TTL_MS, + ancestorWorkspaceIds, }); this.enforceCompletedReportCacheLimit(); diff --git a/src/node/services/tools/task.bash.test.ts b/src/node/services/tools/task.bash.test.ts new file mode 100644 index 0000000000..721a66babb --- /dev/null +++ b/src/node/services/tools/task.bash.test.ts @@ -0,0 +1,178 @@ +import { describe, it, expect, mock } from "bun:test"; +import type { ToolCallOptions } from "ai"; + +import { createTaskTool } from "./task"; +import { createTaskAwaitTool } from "./task_await"; +import { createTaskListTool } from "./task_list"; +import { createTaskTerminateTool } from "./task_terminate"; +import type { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; +import { TestTempDir, createTestToolConfig } from "./testHelpers"; +import type { TaskService } from "@/node/services/taskService"; + +const mockToolCallOptions: ToolCallOptions = { + toolCallId: "test-call-id", + messages: [], +}; + +describe("task_* bash tasks", () => { + it("task(kind=bash) returns a running taskId for background commands", async () => { + using tempDir = new TestTempDir("test-task-bash"); + + const spawn = mock(() => ({ + success: true as const, + processId: "proc-1", + outputDir: "ignored", + pid: 123, + })); + + const backgroundProcessManager = { spawn } as unknown as BackgroundProcessManager; + + const tool = createTaskTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }), + backgroundProcessManager, + }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { + kind: "bash", + script: "echo hi", + timeout_secs: 10, + run_in_background: true, + display_name: "My Proc", + }, + mockToolCallOptions + ) + ); + + expect(spawn).toHaveBeenCalled(); + expect(result).toEqual({ status: "running", taskId: "bash:proc-1" }); + }); + + it("task_await returns incremental output for bash tasks", async () => { + using tempDir = new TestTempDir("test-task-await-bash"); + + const getProcess = mock(() => ({ id: "proc-1", workspaceId: "ws-1", displayName: "My Proc" })); + const getOutput = mock(() => ({ + success: true as const, + status: "running" as const, + output: "hello", + elapsed_ms: 5, + })); + + const backgroundProcessManager = { + getProcess, + getOutput, + } as unknown as BackgroundProcessManager; + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => []), + isDescendantAgentTask: mock(() => false), + waitForAgentReport: mock(() => Promise.resolve({ reportMarkdown: "ignored" })), + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }), + backgroundProcessManager, + taskService, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["bash:proc-1"], timeout_secs: 0 }, mockToolCallOptions) + ); + + expect(getProcess).toHaveBeenCalledWith("proc-1"); + expect(getOutput).toHaveBeenCalled(); + expect(result).toEqual({ + results: [ + { + status: "running", + taskId: "bash:proc-1", + output: "hello", + elapsed_ms: 5, + note: undefined, + }, + ], + }); + }); + + it("task_list includes background bash tasks", async () => { + using tempDir = new TestTempDir("test-task-list-bash"); + + const startTime = Date.parse("2025-01-01T00:00:00.000Z"); + const list = mock(() => [ + { + id: "proc-1", + workspaceId: "ws-1", + status: "running" as const, + displayName: "My Proc", + startTime, + }, + ]); + + const backgroundProcessManager = { list } as unknown as BackgroundProcessManager; + + const taskService = { + listDescendantAgentTasks: mock(() => []), + isDescendantAgentTask: mock(() => false), + } as unknown as TaskService; + + const tool = createTaskListTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }), + backgroundProcessManager, + taskService, + }); + + const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); + + expect(result).toEqual({ + tasks: [ + { + taskId: "bash:proc-1", + status: "running", + parentWorkspaceId: "ws-1", + title: "My Proc", + createdAt: new Date(startTime).toISOString(), + depth: 1, + }, + ], + }); + }); + + it("task_terminate can terminate bash tasks", async () => { + using tempDir = new TestTempDir("test-task-terminate-bash"); + + const getProcess = mock(() => ({ id: "proc-1", workspaceId: "ws-1" })); + const terminate = mock(() => ({ success: true as const })); + + const backgroundProcessManager = { + getProcess, + terminate, + } as unknown as BackgroundProcessManager; + + const taskService = { + terminateDescendantAgentTask: mock(() => + Promise.resolve({ success: false, error: "not used" }) + ), + isDescendantAgentTask: mock(() => false), + } as unknown as TaskService; + + const tool = createTaskTerminateTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }), + backgroundProcessManager, + taskService, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["bash:proc-1"] }, mockToolCallOptions) + ); + + expect(getProcess).toHaveBeenCalledWith("proc-1"); + expect(terminate).toHaveBeenCalledWith("proc-1"); + expect(result).toEqual({ + results: [ + { status: "terminated", taskId: "bash:proc-1", terminatedTaskIds: ["bash:proc-1"] }, + ], + }); + }); +}); diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts index 578758b0b7..d9f5a2cb6a 100644 --- a/src/node/services/tools/task.test.ts +++ b/src/node/services/tools/task.test.ts @@ -31,7 +31,7 @@ describe("task tool", () => { const result: unknown = await Promise.resolve( tool.execute!( - { subagent_type: "explore", prompt: "do it", run_in_background: true }, + { subagent_type: "explore", prompt: "do it", title: "Child task", run_in_background: true }, mockToolCallOptions ) ); @@ -63,7 +63,12 @@ describe("task tool", () => { const result: unknown = await Promise.resolve( tool.execute!( - { subagent_type: "explore", prompt: "do it", run_in_background: false }, + { + subagent_type: "explore", + prompt: "do it", + title: "Child task", + run_in_background: false, + }, mockToolCallOptions ) ); @@ -95,7 +100,10 @@ describe("task tool", () => { let caught: unknown = null; try { await Promise.resolve( - tool.execute!({ subagent_type: "explore", prompt: "do it" }, mockToolCallOptions) + tool.execute!( + { subagent_type: "explore", prompt: "do it", title: "Child task" }, + mockToolCallOptions + ) ); } catch (error: unknown) { caught = error; @@ -131,7 +139,10 @@ describe("task tool", () => { let caught: unknown = null; try { await Promise.resolve( - tool.execute!({ subagent_type: "exec", prompt: "do it" }, mockToolCallOptions) + tool.execute!( + { subagent_type: "exec", prompt: "do it", title: "Child task" }, + mockToolCallOptions + ) ); } catch (error: unknown) { caught = error; diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts index 68500fb70e..475691d05f 100644 --- a/src/node/services/tools/task.ts +++ b/src/node/services/tools/task.ts @@ -1,25 +1,131 @@ import { tool } from "ai"; +import type { BashToolResult } from "@/common/types/tools"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; import { TaskToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; import { coerceThinkingLevel } from "@/common/types/thinking"; +import { createBashTool } from "./bash"; +import { toBashTaskId } from "./taskId"; import { parseToolResult, requireTaskService, requireWorkspaceId } from "./toolUtils"; +function formatBashReport( + args: { script: string; display_name: string }, + result: BashToolResult +): string { + const lines: string[] = []; + + lines.push(`### Bash: ${args.display_name}`); + lines.push(""); + + lines.push("```bash"); + lines.push(args.script.trimEnd()); + lines.push("```"); + lines.push(""); + + lines.push(`exitCode: ${result.exitCode}`); + lines.push(`wall_duration_ms: ${result.wall_duration_ms}`); + + if ("truncated" in result && result.truncated) { + lines.push(""); + lines.push("WARNING: output truncated"); + lines.push(`reason: ${result.truncated.reason}`); + lines.push(`totalLines: ${result.truncated.totalLines}`); + } + + if (!result.success) { + lines.push(""); + lines.push(`error: ${result.error}`); + } + + if (typeof result.output === "string" && result.output.length > 0) { + lines.push(""); + lines.push("```text"); + lines.push(result.output.trimEnd()); + lines.push("```"); + } + + return lines.join("\n"); +} + export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { + let bashTool: ReturnType | null = null; + return tool({ description: TOOL_DEFINITIONS.task.description, inputSchema: TOOL_DEFINITIONS.task.schema, - execute: async (args, { abortSignal }): Promise => { + execute: async (args, { abortSignal, toolCallId, messages }): Promise => { + // Defensive: tool() should have already validated args via inputSchema, + // but keep runtime validation here to preserve type-safety. + const parsedArgs = TOOL_DEFINITIONS.task.schema.safeParse(args); + if (!parsedArgs.success) { + throw new Error(`task tool input validation failed: ${parsedArgs.error.message}`); + } + const validatedArgs = parsedArgs.data; + if (abortSignal?.aborted) { + throw new Error("Interrupted"); + } + + // task(kind="bash") - run bash commands via the task abstraction. + if (validatedArgs.kind === "bash") { + const { script, timeout_secs, run_in_background, display_name } = validatedArgs; + if (!script || timeout_secs === undefined || !display_name) { + throw new Error("task tool input validation failed: expected bash task args"); + } + + bashTool ??= createBashTool(config); + + const bashResult = (await bashTool.execute!( + { + script, + timeout_secs, + run_in_background, + display_name, + }, + { abortSignal, toolCallId, messages } + )) as BashToolResult; + + if ( + bashResult.success && + "backgroundProcessId" in bashResult && + bashResult.backgroundProcessId + ) { + return parseToolResult( + TaskToolResultSchema, + { status: "running" as const, taskId: toBashTaskId(bashResult.backgroundProcessId) }, + "task" + ); + } + + return parseToolResult( + TaskToolResultSchema, + { + status: "completed" as const, + reportMarkdown: formatBashReport({ script, display_name }, bashResult), + title: display_name, + exitCode: bashResult.exitCode, + note: "note" in bashResult ? bashResult.note : undefined, + truncated: "truncated" in bashResult ? bashResult.truncated : undefined, + }, + "task" + ); + } + + const { subagent_type, prompt, title, run_in_background } = validatedArgs; + if (!subagent_type || !prompt || !title) { + throw new Error("task tool input validation failed: expected agent task args"); + } + const workspaceId = requireWorkspaceId(config, "task"); const taskService = requireTaskService(config, "task"); - if (abortSignal?.aborted) { - throw new Error("Interrupted"); + // Disallow recursive sub-agent spawning. + if (config.enableAgentReport) { + throw new Error("Sub-agent workspaces may not spawn additional sub-agent tasks."); } // Plan mode is explicitly non-executing. Allow only read-only exploration tasks. - if (config.mode === "plan" && args.subagent_type === "exec") { + if (config.mode === "plan" && subagent_type === "exec") { throw new Error('In Plan Mode you may only spawn subagent_type: "explore" tasks.'); } @@ -32,9 +138,9 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { const created = await taskService.create({ parentWorkspaceId: workspaceId, kind: "agent", - agentType: args.subagent_type, - prompt: args.prompt, - title: args.title, + agentType: subagent_type, + prompt, + title, modelString, thinkingLevel, }); @@ -43,7 +149,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { throw new Error(created.error); } - if (args.run_in_background) { + if (run_in_background) { return parseToolResult( TaskToolResultSchema, { status: created.data.status, taskId: created.data.taskId }, @@ -63,7 +169,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { taskId: created.data.taskId, reportMarkdown: report.reportMarkdown, title: report.title, - agentType: args.subagent_type, + agentType: subagent_type, }, "task" ); diff --git a/src/node/services/tools/taskId.ts b/src/node/services/tools/taskId.ts new file mode 100644 index 0000000000..4466e1e3ff --- /dev/null +++ b/src/node/services/tools/taskId.ts @@ -0,0 +1,24 @@ +import assert from "node:assert/strict"; + +const BASH_TASK_ID_PREFIX = "bash:"; + +export function toBashTaskId(processId: string): string { + assert(typeof processId === "string", "toBashTaskId: processId must be a string"); + const trimmed = processId.trim(); + assert(trimmed.length > 0, "toBashTaskId: processId must be non-empty"); + return `${BASH_TASK_ID_PREFIX}${trimmed}`; +} + +export function fromBashTaskId(taskId: string): string | null { + assert(typeof taskId === "string", "fromBashTaskId: taskId must be a string"); + if (!taskId.startsWith(BASH_TASK_ID_PREFIX)) { + return null; + } + + const processId = taskId.slice(BASH_TASK_ID_PREFIX.length).trim(); + return processId.length > 0 ? processId : null; +} + +export function isBashTaskId(taskId: string): boolean { + return fromBashTaskId(taskId) !== null; +} diff --git a/src/node/services/tools/task_await.test.ts b/src/node/services/tools/task_await.test.ts index 12c411782e..acd45044de 100644 --- a/src/node/services/tools/task_await.test.ts +++ b/src/node/services/tools/task_await.test.ts @@ -160,4 +160,67 @@ describe("task_await tool", () => { ], }); }); + + it("treats timeout_secs=0 as non-blocking for agent tasks", async () => { + using tempDir = new TestTempDir("test-task-await-tool-timeout-zero"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock(() => { + throw new Error("waitForAgentReport should not be called for timeout_secs=0"); + }); + const getAgentTaskStatus = mock(() => "running" as const); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1"]), + isDescendantAgentTask: mock(() => true), + getAgentTaskStatus, + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ timeout_secs: 0 }, mockToolCallOptions) + ); + + expect(result).toEqual({ results: [{ status: "running", taskId: "t1" }] }); + expect(waitForAgentReport).toHaveBeenCalledTimes(0); + expect(getAgentTaskStatus).toHaveBeenCalledWith("t1"); + }); + + it("returns completed result when timeout_secs=0 and a cached report is available", async () => { + using tempDir = new TestTempDir("test-task-await-tool-timeout-zero-cached"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const getAgentTaskStatus = mock(() => null); + const waitForAgentReport = mock(() => + Promise.resolve({ reportMarkdown: "ok", title: "cached-title" }) + ); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1"]), + isDescendantAgentTask: mock(() => true), + getAgentTaskStatus, + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ timeout_secs: 0 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { + status: "completed", + taskId: "t1", + reportMarkdown: "ok", + title: "cached-title", + }, + ], + }); + expect(getAgentTaskStatus).toHaveBeenCalledWith("t1"); + expect(waitForAgentReport).toHaveBeenCalledTimes(1); + }); }); diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts index 7fa2065140..37b503b7dc 100644 --- a/src/node/services/tools/task_await.ts +++ b/src/node/services/tools/task_await.ts @@ -3,6 +3,7 @@ import { tool } from "ai"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; import { TaskAwaitToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import { fromBashTaskId, toBashTaskId } from "./taskId"; import { dedupeStrings, parseToolResult, @@ -12,11 +13,43 @@ import { function coerceTimeoutMs(timeoutSecs: unknown): number | undefined { if (typeof timeoutSecs !== "number" || !Number.isFinite(timeoutSecs)) return undefined; + if (timeoutSecs < 0) return undefined; const timeoutMs = Math.floor(timeoutSecs * 1000); - if (timeoutMs <= 0) return undefined; return timeoutMs; } +function coerceTimeoutSecs(timeoutSecs: unknown): number | undefined { + if (typeof timeoutSecs !== "number" || !Number.isFinite(timeoutSecs)) return undefined; + if (timeoutSecs < 0) return undefined; + return timeoutSecs; +} + +function formatBashOutputReport(args: { + processId: string; + status: string; + exitCode?: number; + output: string; +}): string { + const lines: string[] = []; + + lines.push(`### Bash task: ${args.processId}`); + lines.push(""); + + lines.push(`status: ${args.status}`); + if (args.exitCode !== undefined) { + lines.push(`exitCode: ${args.exitCode}`); + } + + if (args.output.trim().length > 0) { + lines.push(""); + lines.push("```text"); + lines.push(args.output.trimEnd()); + lines.push("```"); + } + + return lines.join("\n"); +} + export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { return tool({ description: TOOL_DEFINITIONS.task_await.description, @@ -26,14 +59,32 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { const taskService = requireTaskService(config, "task_await"); const timeoutMs = coerceTimeoutMs(args.timeout_secs); + const timeoutSecsForBash = coerceTimeoutSecs(args.timeout_secs) ?? 10 * 60; const requestedIds: string[] | null = args.task_ids && args.task_ids.length > 0 ? args.task_ids : null; - const candidateTaskIds = + let candidateTaskIds: string[] = requestedIds ?? taskService.listActiveDescendantAgentTaskIds(workspaceId); + if (!requestedIds && config.backgroundProcessManager) { + const processes = await config.backgroundProcessManager.list(); + const bashTaskIds = processes + .filter((proc) => { + if (proc.status !== "running") return false; + return ( + proc.workspaceId === workspaceId || + taskService.isDescendantAgentTask(workspaceId, proc.workspaceId) + ); + }) + .map((proc) => toBashTaskId(proc.id)); + + candidateTaskIds = [...candidateTaskIds, ...bashTaskIds]; + } + const uniqueTaskIds = dedupeStrings(candidateTaskIds); + + const agentTaskIds = uniqueTaskIds.filter((taskId) => !taskId.startsWith("bash:")); const bulkFilter = ( taskService as unknown as { filterDescendantAgentTaskIds?: ( @@ -42,18 +93,117 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { ) => string[]; } ).filterDescendantAgentTaskIds; - const descendantTaskIdSet = new Set( + const descendantAgentTaskIdSet = new Set( typeof bulkFilter === "function" - ? bulkFilter.call(taskService, workspaceId, uniqueTaskIds) - : uniqueTaskIds.filter((taskId) => taskService.isDescendantAgentTask(workspaceId, taskId)) + ? bulkFilter.call(taskService, workspaceId, agentTaskIds) + : agentTaskIds.filter((taskId) => taskService.isDescendantAgentTask(workspaceId, taskId)) ); const results = await Promise.all( uniqueTaskIds.map(async (taskId) => { - if (!descendantTaskIdSet.has(taskId)) { + const maybeProcessId = fromBashTaskId(taskId); + if (taskId.startsWith("bash:") && !maybeProcessId) { + return { status: "error" as const, taskId, error: "Invalid bash taskId." }; + } + + if (maybeProcessId) { + if (!config.backgroundProcessManager) { + return { + status: "error" as const, + taskId, + error: "Background process manager not available", + }; + } + + const proc = await config.backgroundProcessManager.getProcess(maybeProcessId); + if (!proc) { + return { status: "not_found" as const, taskId }; + } + + const inScope = + proc.workspaceId === workspaceId || + taskService.isDescendantAgentTask(workspaceId, proc.workspaceId); + if (!inScope) { + return { status: "invalid_scope" as const, taskId }; + } + + const outputResult = await config.backgroundProcessManager.getOutput( + maybeProcessId, + args.filter, + args.filter_exclude, + timeoutSecsForBash, + abortSignal, + workspaceId, + "task_await" + ); + + if (!outputResult.success) { + return { status: "error" as const, taskId, error: outputResult.error }; + } + + if (outputResult.status === "running" || outputResult.status === "interrupted") { + return { + status: "running" as const, + taskId, + output: outputResult.output, + elapsed_ms: outputResult.elapsed_ms, + note: outputResult.note, + }; + } + + return { + status: "completed" as const, + taskId, + title: proc.displayName ?? proc.id, + reportMarkdown: formatBashOutputReport({ + processId: proc.id, + status: outputResult.status, + exitCode: outputResult.exitCode, + output: outputResult.output, + }), + output: outputResult.output, + elapsed_ms: outputResult.elapsed_ms, + exitCode: outputResult.exitCode, + note: outputResult.note, + }; + } + + if (!descendantAgentTaskIdSet.has(taskId)) { return { status: "invalid_scope" as const, taskId }; } + // When timeout_secs=0 (or rounds down to 0ms), task_await should be non-blocking. + // `waitForAgentReport` asserts timeoutMs > 0, so handle 0 explicitly by returning the + // current task status instead of awaiting. + if (timeoutMs === 0) { + const status = taskService.getAgentTaskStatus(taskId); + if (status === "queued" || status === "running" || status === "awaiting_report") { + return { status, taskId }; + } + + // Best-effort: the task might already have a cached report (even if its workspace was + // cleaned up). Avoid blocking when it isn't available. + try { + const report = await taskService.waitForAgentReport(taskId, { + timeoutMs: 1, + abortSignal, + requestingWorkspaceId: workspaceId, + }); + return { + status: "completed" as const, + taskId, + reportMarkdown: report.reportMarkdown, + title: report.title, + }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + if (/not found/i.test(message)) { + return { status: "not_found" as const, taskId }; + } + return { status: "error" as const, taskId, error: message }; + } + } + try { const report = await taskService.waitForAgentReport(taskId, { timeoutMs, diff --git a/src/node/services/tools/task_list.ts b/src/node/services/tools/task_list.ts index 1027793a8a..bd904f2fd6 100644 --- a/src/node/services/tools/task_list.ts +++ b/src/node/services/tools/task_list.ts @@ -3,6 +3,7 @@ import { tool } from "ai"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; import { TaskListToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import { toBashTaskId } from "./taskId"; import { parseToolResult, requireTaskService, requireWorkspaceId } from "./toolUtils"; const DEFAULT_STATUSES = ["queued", "running", "awaiting_report"] as const; @@ -11,13 +12,44 @@ export const createTaskListTool: ToolFactory = (config: ToolConfiguration) => { return tool({ description: TOOL_DEFINITIONS.task_list.description, inputSchema: TOOL_DEFINITIONS.task_list.schema, - execute: (args): unknown => { + execute: async (args): Promise => { const workspaceId = requireWorkspaceId(config, "task_list"); const taskService = requireTaskService(config, "task_list"); const statuses = args.statuses && args.statuses.length > 0 ? args.statuses : [...DEFAULT_STATUSES]; - const tasks = taskService.listDescendantAgentTasks(workspaceId, { statuses }); + + const agentTasks = taskService.listDescendantAgentTasks(workspaceId, { statuses }); + const tasks = [...agentTasks]; + + if (config.backgroundProcessManager) { + const depthByWorkspaceId = new Map(); + depthByWorkspaceId.set(workspaceId, 0); + for (const t of agentTasks) { + depthByWorkspaceId.set(t.taskId, t.depth); + } + + const processes = await config.backgroundProcessManager.list(); + for (const proc of processes) { + const inScope = + proc.workspaceId === workspaceId || + taskService.isDescendantAgentTask(workspaceId, proc.workspaceId); + if (!inScope) continue; + + const status = proc.status === "running" ? "running" : "reported"; + if (!statuses.includes(status)) continue; + + const parentDepth = depthByWorkspaceId.get(proc.workspaceId) ?? 0; + tasks.push({ + taskId: toBashTaskId(proc.id), + status, + parentWorkspaceId: proc.workspaceId, + title: proc.displayName ?? proc.id, + createdAt: new Date(proc.startTime).toISOString(), + depth: parentDepth + 1, + }); + } + } return parseToolResult(TaskListToolResultSchema, { tasks }, "task_list"); }, diff --git a/src/node/services/tools/task_terminate.ts b/src/node/services/tools/task_terminate.ts index 20d9460c3a..9f67d67c95 100644 --- a/src/node/services/tools/task_terminate.ts +++ b/src/node/services/tools/task_terminate.ts @@ -6,6 +6,7 @@ import { TOOL_DEFINITIONS, } from "@/common/utils/tools/toolDefinitions"; +import { fromBashTaskId } from "./taskId"; import { dedupeStrings, parseToolResult, @@ -25,6 +26,44 @@ export const createTaskTerminateTool: ToolFactory = (config: ToolConfiguration) const results = await Promise.all( uniqueTaskIds.map(async (taskId) => { + const maybeProcessId = fromBashTaskId(taskId); + if (taskId.startsWith("bash:") && !maybeProcessId) { + return { status: "error" as const, taskId, error: "Invalid bash taskId." }; + } + + if (maybeProcessId) { + if (!config.backgroundProcessManager) { + return { + status: "error" as const, + taskId, + error: "Background process manager not available", + }; + } + + const proc = await config.backgroundProcessManager.getProcess(maybeProcessId); + if (!proc) { + return { status: "not_found" as const, taskId }; + } + + const inScope = + proc.workspaceId === workspaceId || + taskService.isDescendantAgentTask(workspaceId, proc.workspaceId); + if (!inScope) { + return { status: "invalid_scope" as const, taskId }; + } + + const terminateResult = await config.backgroundProcessManager.terminate(maybeProcessId); + if (!terminateResult.success) { + return { status: "error" as const, taskId, error: terminateResult.error }; + } + + return { + status: "terminated" as const, + taskId, + terminatedTaskIds: [taskId], + }; + } + const terminateResult = await taskService.terminateDescendantAgentTask( workspaceId, taskId diff --git a/tests/ipc/backgroundBash.test.ts b/tests/ipc/backgroundBash.test.ts index bf30191ebe..34ca7f20c1 100644 --- a/tests/ipc/backgroundBash.test.ts +++ b/tests/ipc/backgroundBash.test.ts @@ -22,67 +22,111 @@ import { generateBranchName, createWorkspaceWithInit, sendMessageAndWait, - extractTextFromEvents, HAIKU_MODEL, } from "./helpers"; import type { WorkspaceChatMessage } from "../../src/common/orpc/types"; import type { ToolPolicy } from "../../src/common/utils/tools/toolPolicy"; -// Tool policy: Allow bash and bash_background_* tools (bash prefix matches all) -const BACKGROUND_TOOLS: ToolPolicy = [ - { regex_match: "bash", action: "enable" }, - { regex_match: "file_.*", action: "disable" }, +// Tool policy: Enable only task* tools (task, task_list, task_await, task_terminate). +const TASK_TOOLS: ToolPolicy = [ + { regex_match: ".*", action: "disable" }, + { regex_match: "task.*", action: "enable" }, ]; // Extended timeout for tests making multiple AI calls const BACKGROUND_TEST_TIMEOUT_MS = 75000; /** - * Extract process ID from bash tool output containing "Background process started with ID: xxx" - * The process ID is now the display_name, which can be any string like "Sleep Process" or "bash_123" + * Extract a bash taskId (e.g. "bash:") from task(kind="bash") results. */ -function extractProcessId(events: WorkspaceChatMessage[]): string | null { +function extractBashTaskId(events: WorkspaceChatMessage[]): string | null { for (const event of events) { - if ( - "type" in event && - event.type === "tool-call-end" && - "toolName" in event && - event.toolName === "bash" - ) { - const result = (event as { result?: { output?: string } }).result?.output; - if (typeof result === "string") { - // Match any non-empty process ID after "Background process started with ID: " - const match = result.match(/Background process started with ID: (.+)$/); - if (match) return match[1].trim(); + if (!("type" in event) || event.type !== "tool-call-end") continue; + if (!("toolName" in event) || event.toolName !== "task") continue; + + const taskId = (event as { result?: { taskId?: string } }).result?.taskId; + if (typeof taskId !== "string") continue; + + const trimmed = taskId.trim(); + if (trimmed.startsWith("bash:")) return trimmed; + } + return null; +} + +/** + * Extract taskIds from a task_list tool result. + */ +function extractTaskListTaskIds(events: WorkspaceChatMessage[]): string[] { + for (const event of events) { + if (!("type" in event) || event.type !== "tool-call-end") continue; + if (!("toolName" in event) || event.toolName !== "task_list") continue; + + const tasks = (event as { result?: { tasks?: Array<{ taskId?: string }> } }).result?.tasks; + if (!Array.isArray(tasks)) return []; + + return tasks + .map((t) => t.taskId) + .filter((taskId): taskId is string => typeof taskId === "string"); + } + return []; +} + +/** + * Collect output strings from task_await tool results. + */ +function collectTaskAwaitOutputs(events: WorkspaceChatMessage[]): string { + const outputs: string[] = []; + + for (const event of events) { + if (!("type" in event) || event.type !== "tool-call-end") continue; + if (!("toolName" in event) || event.toolName !== "task_await") continue; + + const results = ( + event as { result?: { results?: Array<{ output?: string; reportMarkdown?: string }> } } + ).result?.results; + + if (!Array.isArray(results)) continue; + + for (const result of results) { + if (typeof result.output === "string" && result.output.length > 0) { + outputs.push(result.output); + continue; + } + if (typeof result.reportMarkdown === "string" && result.reportMarkdown.length > 0) { + outputs.push(result.reportMarkdown); } } } - return null; + + return outputs.join("\n"); } /** - * Check if any tool output contains a specific string + * Extract terminated task ids from a task_terminate tool result. */ -function toolOutputContains( - events: WorkspaceChatMessage[], - toolName: string, - substring: string -): boolean { +function extractTerminatedTaskIds(events: WorkspaceChatMessage[]): string[] { for (const event of events) { - if ( - "type" in event && - event.type === "tool-call-end" && - "toolName" in event && - event.toolName === toolName - ) { - const result = (event as { result?: { output?: string; message?: string } }).result; - const text = result?.output ?? result?.message; - if (typeof text === "string" && text.includes(substring)) { - return true; + if (!("type" in event) || event.type !== "tool-call-end") continue; + if (!("toolName" in event) || event.toolName !== "task_terminate") continue; + + const results = ( + event as { + result?: { + results?: Array<{ status?: string; terminatedTaskIds?: string[] }>; + }; } + ).result?.results; + if (!Array.isArray(results)) return []; + + const terminated: string[] = []; + for (const result of results) { + if (result.status !== "terminated") continue; + if (!Array.isArray(result.terminatedTaskIds)) continue; + terminated.push(...result.terminatedTaskIds); } + return terminated; } - return false; + return []; } // Skip all tests if TEST_INTEGRATION is not set @@ -119,47 +163,44 @@ describeIntegration("Background Bash Execution", () => { ); try { - // Start a background process using explicit tool call instruction + // Start a background bash task via task(kind="bash") const startEvents = await sendMessageAndWait( env, workspaceId, - "Use the bash tool with run_in_background=true to run: true && sleep 30", + 'Use the task tool with args: { kind: "bash", script: "true && sleep 30", timeout_secs: 60, run_in_background: true, display_name: "bg-basic" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 30000 ); - // Extract process ID from tool output (now uses display_name) - const processId = extractProcessId(startEvents); - expect(processId).not.toBeNull(); - expect(processId!.length).toBeGreaterThan(0); + const taskId = extractBashTaskId(startEvents); + expect(taskId).not.toBeNull(); + expect(taskId!.startsWith("bash:")).toBe(true); - // List background processes to verify it's tracked + // List tasks to verify it's tracked const listEvents = await sendMessageAndWait( env, workspaceId, - "Use the bash_background_list tool to show running background processes", + "Use task_list to show running tasks.", HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 20000 ); - // Verify the process appears in the list - const responseText = extractTextFromEvents(listEvents); - expect( - responseText.includes(processId!) || - toolOutputContains(listEvents, "bash_background_list", processId!) - ).toBe(true); + const listedTaskIds = extractTaskListTaskIds(listEvents); + expect(listedTaskIds).toContain(taskId!); // Clean up: terminate the background process - await sendMessageAndWait( + const terminateEvents = await sendMessageAndWait( env, workspaceId, - `Use bash_background_terminate to terminate process ${processId}`, + `Use task_terminate with task_ids: ["${taskId}"] to terminate the task.`, HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 20000 ); + const terminatedTaskIds = extractTerminatedTaskIds(terminateEvents); + expect(terminatedTaskIds).toContain(taskId!); } finally { await cleanup(); } @@ -196,53 +237,44 @@ describeIntegration("Background Bash Execution", () => { ); try { - // Start a long-running background process + // Start a long-running background bash task const startEvents = await sendMessageAndWait( env, workspaceId, - "Use bash with run_in_background=true to run: true && sleep 300", + 'Use the task tool with args: { kind: "bash", script: "true && sleep 300", timeout_secs: 600, run_in_background: true, display_name: "bg-terminate" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 30000 ); - const processId = extractProcessId(startEvents); - expect(processId).not.toBeNull(); + const taskId = extractBashTaskId(startEvents); + expect(taskId).not.toBeNull(); - // Terminate the process + // Terminate the task const terminateEvents = await sendMessageAndWait( env, workspaceId, - `Use bash_background_terminate to terminate process ${processId}`, + `Use task_terminate with task_ids: ["${taskId}"] to terminate the task.`, HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 20000 ); - // Verify termination succeeded (tool output should indicate success) - const terminateSuccess = - toolOutputContains(terminateEvents, "bash_background_terminate", "terminated") || - toolOutputContains(terminateEvents, "bash_background_terminate", "success") || - toolOutputContains(terminateEvents, "bash_background_terminate", processId!); - expect(terminateSuccess).toBe(true); + const terminatedTaskIds = extractTerminatedTaskIds(terminateEvents); + expect(terminatedTaskIds).toContain(taskId!); - // List to verify status changed to killed + // List to verify the task remains discoverable (including reported) const listEvents = await sendMessageAndWait( env, workspaceId, - "Use bash_background_list to show all background processes including terminated ones", + 'Use task_list with statuses: ["queued", "running", "awaiting_report", "reported"].', HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 20000 ); - // Process should show as killed/terminated - const listResponse = extractTextFromEvents(listEvents); - expect( - listResponse.toLowerCase().includes("killed") || - listResponse.toLowerCase().includes("terminated") || - toolOutputContains(listEvents, "bash_background_list", "killed") - ).toBe(true); + const listedTaskIds = extractTaskListTaskIds(listEvents); + expect(listedTaskIds).toContain(taskId!); } finally { await cleanup(); } @@ -284,42 +316,27 @@ describeIntegration("Background Bash Execution", () => { const startEvents = await sendMessageAndWait( env, workspaceId, - `Use bash with run_in_background=true to run: echo "${marker}" && sleep 1`, + `Use the task tool with args: { kind: "bash", script: "echo \"${marker}\" && sleep 1", timeout_secs: 30, run_in_background: true, display_name: "bg-output" }. Do not spawn a sub-agent.`, HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 30000 ); - const processId = extractProcessId(startEvents); - expect(processId).not.toBeNull(); - - // Wait for process to complete and output to be written - await new Promise((resolve) => setTimeout(resolve, 2000)); + const taskId = extractBashTaskId(startEvents); + expect(taskId).not.toBeNull(); - // List processes - should show the marker in output or process details - const listEvents = await sendMessageAndWait( + // Wait for the process to complete and retrieve its output + const awaitEvents = await sendMessageAndWait( env, workspaceId, - `Use bash_background_list to show details of background processes`, + `Use task_await with task_ids: ["${taskId}"] and timeout_secs: 10 to retrieve output.`, HAIKU_MODEL, - BACKGROUND_TOOLS, + TASK_TOOLS, 20000 ); - // The process should have exited (status: exited) after sleep completes - const listResponse = extractTextFromEvents(listEvents); - const hasExited = - listResponse.toLowerCase().includes("exited") || - listResponse.toLowerCase().includes("completed") || - toolOutputContains(listEvents, "bash_background_list", "exited"); - - // Process may still be running or just finished - either is acceptable - // The main assertion is that the process was tracked - expect( - hasExited || - listResponse.includes(processId!) || - toolOutputContains(listEvents, "bash_background_list", processId!) - ).toBe(true); + const output = collectTaskAwaitOutputs(awaitEvents); + expect(output).toContain(marker); } finally { await cleanup(); } diff --git a/tests/ipc/backgroundBashDirect.test.ts b/tests/ipc/backgroundBashDirect.test.ts index b7d95e283d..4e108c59cd 100644 --- a/tests/ipc/backgroundBashDirect.test.ts +++ b/tests/ipc/backgroundBashDirect.test.ts @@ -20,25 +20,20 @@ import * as path from "path"; import { createTestEnvironment, cleanupTestEnvironment, type TestEnvironment } from "./setup"; import { createTempGitRepo, cleanupTempGitRepo, generateBranchName } from "./helpers"; import { detectDefaultTrunkBranch } from "../../src/node/git"; -import { getToolsForModel } from "../../src/common/utils/tools/tools"; import { LocalRuntime } from "../../src/node/runtime/LocalRuntime"; import { BackgroundProcessManager } from "../../src/node/services/backgroundProcessManager"; -import type { InitStateManager } from "../../src/node/services/initStateManager"; +import { createBashTool } from "../../src/node/services/tools/bash"; +import { createBashOutputTool } from "../../src/node/services/tools/bash_output"; // Access private fields from ServiceContainer for direct testing interface ServiceContainerPrivates { backgroundProcessManager: BackgroundProcessManager; - initStateManager: InitStateManager; } function getBackgroundProcessManager(env: TestEnvironment): BackgroundProcessManager { return (env.services as unknown as ServiceContainerPrivates).backgroundProcessManager; } -function getInitStateManager(env: TestEnvironment): InitStateManager { - return (env.services as unknown as ServiceContainerPrivates).initStateManager; -} - interface ToolExecuteResult { success: boolean; backgroundProcessId?: string; @@ -82,31 +77,30 @@ describe("Background Bash Direct Integration", () => { }); it("should retrieve output after tools are recreated (multi-message flow)", async () => { - // Simulates production flow where tools are recreated between messages + // Simulates production flow where tool instances are recreated between messages const manager = getBackgroundProcessManager(env); - const initStateManager = getInitStateManager(env); const runtime = new LocalRuntime(workspacePath); const marker = `MULTI_MSG_${Date.now()}`; + const toolConfig = { + cwd: workspacePath, + runtime, + secrets: {}, + muxEnv: {}, + runtimeTempDir: "/tmp", + backgroundProcessManager: manager, + workspaceId, + }; + // Message 1: Spawn background process - const tools1 = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", + const bash1 = createBashTool(toolConfig); + const spawnResult = (await bash1.execute!( { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, + script: `echo "${marker}"`, + run_in_background: true, + display_name: `spawn_${Date.now()}`, + timeout_secs: 30, }, - workspaceId, - initStateManager, - {} - ); - - const spawnResult = (await tools1.bash.execute!( - { script: `echo "${marker}"`, run_in_background: true }, { toolCallId: "spawn", messages: [] } )) as ToolExecuteResult; @@ -116,24 +110,9 @@ describe("Background Bash Direct Integration", () => { await new Promise((resolve) => setTimeout(resolve, 200)); // Message 2: Read with NEW tool instances (same manager) - const tools2 = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); - - const outputResult = (await tools2.bash_output.execute!( - { process_id: processId }, + const bashOutput2 = createBashOutputTool(toolConfig); + const outputResult = (await bashOutput2.execute!( + { process_id: processId, timeout_secs: 0 }, { toolCallId: "read", messages: [] } )) as ToolExecuteResult; @@ -416,34 +395,29 @@ describe("Foreground to Background Migration", () => { // 4. Process continues running and output is accessible via bash_output const manager = getBackgroundProcessManager(env); - const initStateManager = getInitStateManager(env); const runtime = new LocalRuntime(workspacePath); + const toolConfig = { + cwd: workspacePath, + runtime, + secrets: {}, + muxEnv: {}, + runtimeTempDir: "/tmp", + backgroundProcessManager: manager, + workspaceId, + }; + const testId = `fg_to_bg_${Date.now()}`; const marker1 = `BEFORE_BG_${testId}`; const marker2 = `AFTER_BG_${testId}`; // Create tools for "message 1" - const tools1 = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); + const bash1 = createBashTool(toolConfig); // Start foreground bash that runs for ~3 seconds // Script: output marker1, sleep, output marker2 const toolCallId = `tool_${testId}`; - const bashPromise = tools1.bash.execute!( + const bashPromise = bash1.execute!( { script: `echo "${marker1}"; sleep 2; echo "${marker2}"`, run_in_background: false, @@ -487,27 +461,13 @@ describe("Foreground to Background Migration", () => { // === Simulate new message (stream ends, new stream begins) === // Create NEW tool instances (same manager reference, fresh tools) - const tools2 = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); + const bashOutput2 = createBashOutputTool(toolConfig); // Wait for process to complete (marker2 should appear) await new Promise((resolve) => setTimeout(resolve, 2500)); // Get output via bash_output tool (new tool instance) - const outputResult = (await tools2.bash_output.execute!( + const outputResult = (await bashOutput2.execute!( { process_id: testId, timeout_secs: 0 }, { toolCallId: "output_read", messages: [] } )) as ToolExecuteResult; @@ -525,34 +485,29 @@ describe("Foreground to Background Migration", () => { // after migration and accessible in subsequent messages const manager = getBackgroundProcessManager(env); - const initStateManager = getInitStateManager(env); const runtime = new LocalRuntime(workspacePath); + const toolConfig = { + cwd: workspacePath, + runtime, + secrets: {}, + muxEnv: {}, + runtimeTempDir: "/tmp", + backgroundProcessManager: manager, + workspaceId, + }; + const testId = `preserve_output_${Date.now()}`; const marker1 = `EARLY_${testId}`; const marker2 = `LATE_${testId}`; - const tools1 = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); + const bash1 = createBashTool(toolConfig); const toolCallId = `tool_${testId}`; // Script outputs marker1, sleeps, then outputs marker2 const script = `echo "${marker1}"; sleep 2; echo "${marker2}"`; - const bashPromise = tools1.bash.execute!( + const bashPromise = bash1.execute!( { script, run_in_background: false, @@ -592,32 +547,27 @@ describe("Foreground to Background Migration", () => { it("should handle migration when process exits during send", async () => { // Edge case: process exits right as we try to background it const manager = getBackgroundProcessManager(env); - const initStateManager = getInitStateManager(env); const runtime = new LocalRuntime(workspacePath); + const toolConfig = { + cwd: workspacePath, + runtime, + secrets: {}, + muxEnv: {}, + runtimeTempDir: "/tmp", + backgroundProcessManager: manager, + workspaceId, + }; + const testId = `fast_exit_${Date.now()}`; const marker = `QUICK_${testId}`; - const tools = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); + const bash = createBashTool(toolConfig); const toolCallId = `tool_${testId}`; // Very fast script - const bashPromise = tools.bash.execute!( + const bashPromise = bash.execute!( { script: `echo "${marker}"`, run_in_background: false, @@ -646,9 +596,18 @@ describe("Foreground to Background Migration", () => { // new message), the abort signal would kill the process with exit code -997. const manager = getBackgroundProcessManager(env); - const initStateManager = getInitStateManager(env); const runtime = new LocalRuntime(workspacePath); + const toolConfig = { + cwd: workspacePath, + runtime, + secrets: {}, + muxEnv: {}, + runtimeTempDir: "/tmp", + backgroundProcessManager: manager, + workspaceId, + }; + const testId = `abort_after_bg_${Date.now()}`; const marker1 = `BEFORE_${testId}`; const marker2 = `AFTER_${testId}`; @@ -656,26 +615,12 @@ describe("Foreground to Background Migration", () => { // Create an AbortController to simulate stream abort const abortController = new AbortController(); - const tools = await getToolsForModel( - "anthropic:claude-sonnet-4-20250514", - { - cwd: workspacePath, - runtime, - secrets: {}, - muxEnv: {}, - runtimeTempDir: "/tmp", - backgroundProcessManager: manager, - workspaceId, - }, - workspaceId, - initStateManager, - {} - ); + const bash = createBashTool(toolConfig); const toolCallId = `tool_${testId}`; // Start a foreground bash with the abort signal - const bashPromise = tools.bash.execute!( + const bashPromise = bash.execute!( { script: `echo "${marker1}"; sleep 2; echo "${marker2}"`, run_in_background: false, diff --git a/tests/ipc/forkWorkspace.test.ts b/tests/ipc/forkWorkspace.test.ts index b976054329..baa02959ae 100644 --- a/tests/ipc/forkWorkspace.test.ts +++ b/tests/ipc/forkWorkspace.test.ts @@ -94,6 +94,8 @@ describeIntegration("Workspace fork", () => { // User expects: forked workspace is functional - can send messages to it const collector = createStreamCollector(env.orpc, forkedWorkspaceId); collector.start(); + await collector.waitForSubscription(); + const sendResult = await sendMessageWithModel( env, forkedWorkspaceId, @@ -149,6 +151,8 @@ describeIntegration("Workspace fork", () => { // Send a message that requires the historical context const collector = createStreamCollector(env.orpc, forkedWorkspaceId); collector.start(); + await collector.waitForSubscription(); + const sendResult = await sendMessageWithModel( env, forkedWorkspaceId, @@ -202,6 +206,10 @@ describeIntegration("Workspace fork", () => { const forkedCollector = createStreamCollector(env.orpc, forkedWorkspaceId); sourceCollector.start(); forkedCollector.start(); + await Promise.all([ + sourceCollector.waitForSubscription(), + forkedCollector.waitForSubscription(), + ]); // Send different messages to both concurrently const [sourceResult, forkedResult] = await Promise.all([ @@ -251,6 +259,7 @@ describeIntegration("Workspace fork", () => { // Start collector before starting stream const sourceCollector = createStreamCollector(env.orpc, sourceWorkspaceId); sourceCollector.start(); + await sourceCollector.waitForSubscription(); // Start a stream in the source workspace (don't await) void sendMessageWithModel( @@ -284,6 +293,8 @@ describeIntegration("Workspace fork", () => { // Send a message to the forked workspace const forkedCollector = createStreamCollector(env.orpc, forkedWorkspaceId); forkedCollector.start(); + await forkedCollector.waitForSubscription(); + const forkedSendResult = await sendMessageWithModel( env, forkedWorkspaceId, diff --git a/tests/ipc/ollama.test.ts b/tests/ipc/ollama.test.ts index db5f3ade68..e709d02568 100644 --- a/tests/ipc/ollama.test.ts +++ b/tests/ipc/ollama.test.ts @@ -138,12 +138,15 @@ describeOllama("Ollama integration", () => { const collector = createStreamCollector(env.orpc, workspaceId); collector.start(); try { - // Ask for current time which should trigger bash tool + // Ask for current time which should trigger task(kind="bash") const result = await sendMessageWithModel( env, workspaceId, - "What is the current date and time? Use the bash tool to find out.", - modelString("ollama", OLLAMA_MODEL) + 'Use task(kind="bash") to run: date. Set display_name="current-time" and timeout_secs=30. Do not spawn a sub-agent.', + modelString("ollama", OLLAMA_MODEL), + { + toolPolicy: [{ regex_match: "task", action: "require" }], + } ); expect(result.success).toBe(true); @@ -153,20 +156,32 @@ describeOllama("Ollama integration", () => { assertStreamSuccess(collector); - // Verify bash tool was called via events + // Verify task(kind="bash") was called via events const events = collector.getEvents(); const toolCallStarts = events.filter((e: any) => e.type === "tool-call-start"); expect(toolCallStarts.length).toBeGreaterThan(0); - const bashCall = toolCallStarts.find((e: any) => e.toolName === "bash"); - expect(bashCall).toBeDefined(); + const taskCall = toolCallStarts.find( + (e: any) => e.toolName === "task" && e.args && e.args.kind === "bash" + ); + expect(taskCall).toBeDefined(); - // Verify we got a text response with date/time info + // Verify we got a response and/or tool report with date/time info const deltas = collector.getDeltas(); const responseText = extractTextFromEvents(deltas).toLowerCase(); - // Should mention time or date in response - expect(responseText).toMatch(/time|date|am|pm|2024|2025/i); + const toolCallEnds = events.filter( + (e: any) => + e.type === "tool-call-end" && e.toolName === "task" && e.args && e.args.kind === "bash" + ); + const taskReport = toolCallEnds + .map((e: any) => e.result?.reportMarkdown) + .filter((t: any) => typeof t === "string") + .join("\n") + .toLowerCase(); + + // Should mention time or date in response or in the tool report + expect(`${responseText}\n${taskReport}`).toMatch(/time|date|am|pm|\d{2}:\d{2}|20\d{2}/i); } finally { collector.stop(); await cleanup(); diff --git a/tests/ipc/resumeStream.test.ts b/tests/ipc/resumeStream.test.ts index a362af63ad..52cc94f07c 100644 --- a/tests/ipc/resumeStream.test.ts +++ b/tests/ipc/resumeStream.test.ts @@ -34,8 +34,11 @@ describeIntegration("resumeStream", () => { void sendMessageWithModel( env, workspaceId, - `Run this bash command: for i in 1 2 3; do sleep 0.5; done && echo '${expectedWord}'`, - modelString("anthropic", "claude-sonnet-4-5") + `Use task(kind="bash") to run: for i in {1..10}; do sleep 0.5; done && echo '${expectedWord}'. Set display_name="resume-test" and timeout_secs=120. Do not spawn a sub-agent.`, + modelString("anthropic", "claude-sonnet-4-5"), + { + toolPolicy: [{ regex_match: "task", action: "require" }], + } ); // Wait for stream to start diff --git a/tests/ipc/runtimeExecuteBash.test.ts b/tests/ipc/runtimeExecuteBash.test.ts index 5a9779dc5c..bc1e16456f 100644 --- a/tests/ipc/runtimeExecuteBash.test.ts +++ b/tests/ipc/runtimeExecuteBash.test.ts @@ -35,11 +35,8 @@ import type { RuntimeConfig } from "../../src/common/types/runtime"; import type { WorkspaceChatMessage } from "../../src/common/orpc/types"; import type { ToolPolicy } from "../../src/common/utils/tools/toolPolicy"; -// Tool policy: Only allow bash tool -const BASH_ONLY: ToolPolicy = [ - { regex_match: "bash", action: "enable" }, - { regex_match: "file_.*", action: "disable" }, -]; +// Tool policy: Only allow the unified task tool (used as task(kind="bash")). +const TASK_BASH_ONLY: ToolPolicy = [{ regex_match: "task", action: "require" }]; /** * Collect tool outputs from stream events @@ -54,8 +51,9 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s event.toolName === toolName ) .map((event) => { - const result = (event as { result?: { output?: string } }).result?.output; - return typeof result === "string" ? result : ""; + const result = (event as { result?: { output?: string; reportMarkdown?: string } }).result; + const text = toolName === "task" ? result?.reportMarkdown : result?.output; + return typeof text === "string" ? text : ""; }) .join("\n"); } @@ -67,13 +65,23 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s function getToolDuration(events: WorkspaceChatMessage[], toolName: string): number { const startEvent = events.find( (e) => "type" in e && e.type === "tool-call-start" && "toolName" in e && e.toolName === toolName - ) as { timestamp?: number } | undefined; + ) as { toolCallId?: string; timestamp?: number } | undefined; + + if (!startEvent?.toolCallId || !startEvent.timestamp) { + return -1; + } const endEvent = events.find( - (e) => "type" in e && e.type === "tool-call-end" && "toolName" in e && e.toolName === toolName + (e) => + "type" in e && + e.type === "tool-call-end" && + "toolName" in e && + e.toolName === toolName && + "toolCallId" in e && + e.toolCallId === startEvent.toolCallId ) as { timestamp?: number } | undefined; - if (startEvent?.timestamp && endEvent?.timestamp) { + if (endEvent?.timestamp) { return endEvent.timestamp - startEvent.timestamp; } return -1; @@ -161,23 +169,33 @@ describeIntegration("Runtime Bash Execution", () => { const events = await sendMessageAndWait( env, workspaceId, - 'Run the bash command "echo Hello World"', + 'Use the task tool with args: { kind: "bash", script: "echo Hello World", timeout_secs: 30, run_in_background: false, display_name: "echo-hello" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BASH_ONLY + TASK_BASH_ONLY ); // Extract response text const responseText = extractTextFromEvents(events); - // Verify the command output appears in the response - expect(responseText.toLowerCase()).toContain("hello world"); + // Verify the command output appears in the task tool result. + const taskOutput = collectToolOutputs(events, "task"); + expect(taskOutput.toLowerCase()).toContain("hello world"); + + // responseText might be empty if the model doesn't comment on the output. + if (responseText) { + expect(responseText.toLowerCase()).toContain("hello world"); + } - // Verify bash tool was called + // Verify task(kind="bash") was called const toolCallStarts = events.filter( (e) => "type" in e && e.type === "tool-call-start" ); - const bashCall = toolCallStarts.find((e) => "toolName" in e && e.toolName === "bash"); - expect(bashCall).toBeDefined(); + const taskCall = toolCallStarts.find((e) => { + if (!("toolName" in e) || e.toolName !== "task") return false; + const args = (e as { args?: { kind?: string } }).args; + return args?.kind === "bash"; + }); + expect(taskCall).toBeDefined(); } finally { await cleanup(); } @@ -220,23 +238,33 @@ describeIntegration("Runtime Bash Execution", () => { const events = await sendMessageAndWait( env, workspaceId, - 'Run bash command: export TEST_VAR="test123" && echo "Value: $TEST_VAR"', + 'Use the task tool with args: { kind: "bash", script: "export TEST_VAR=test123 && echo Value:$TEST_VAR", timeout_secs: 30, run_in_background: false, display_name: "env-var" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BASH_ONLY + TASK_BASH_ONLY ); // Extract response text const responseText = extractTextFromEvents(events); - // Verify the env var value appears - expect(responseText).toContain("test123"); + // Verify the env var value appears in the task tool output. + const taskOutput = collectToolOutputs(events, "task"); + expect(taskOutput).toContain("test123"); - // Verify bash tool was called + // responseText might be empty if the model doesn't comment on the output. + if (responseText) { + expect(responseText).toContain("test123"); + } + + // Verify task(kind="bash") was called const toolCallStarts = events.filter( (e) => "type" in e && e.type === "tool-call-start" ); - const bashCall = toolCallStarts.find((e) => "toolName" in e && e.toolName === "bash"); - expect(bashCall).toBeDefined(); + const taskCall = toolCallStarts.find((e) => { + if (!("toolName" in e) || e.toolName !== "task") return false; + const args = (e as { args?: { kind?: string } }).args; + return args?.kind === "bash"; + }); + expect(taskCall).toBeDefined(); } finally { await cleanup(); } @@ -275,37 +303,28 @@ describeIntegration("Runtime Bash Execution", () => { ); try { - // Create a test file with JSON content - await sendMessageAndWait( - env, - workspaceId, - 'Run bash: echo \'{"test": "data"}\' > /tmp/test.json', - HAIKU_MODEL, - BASH_ONLY - ); - - // Test command that pipes file through stdin-reading command (grep) + // Test command that pipes a file through a stdin-reading command (grep) // This would hang forever if stdin.close() was used instead of stdin.abort() // Regression test for: https://github.com/coder/mux/issues/503 const events = await sendMessageAndWait( env, workspaceId, - "Run bash: cat /tmp/test.json | grep test", + 'Use the task tool with args: { kind: "bash", script: "echo testdata > /tmp/test.txt && cat /tmp/test.txt | grep test", timeout_secs: 30, run_in_background: false, display_name: "stdin-grep" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BASH_ONLY, + TASK_BASH_ONLY, 30000 // Relaxed timeout for CI stability (was 10s) ); // Calculate actual tool execution duration - const toolDuration = getToolDuration(events, "bash"); + const toolDuration = getToolDuration(events, "task"); // Extract response text const responseText = extractTextFromEvents(events); // Verify command completed successfully (not timeout) // We primarily check bashOutput to ensure the tool executed and didn't hang - const bashOutput = collectToolOutputs(events, "bash"); - expect(bashOutput).toContain('"test": "data"'); + const bashOutput = collectToolOutputs(events, "task"); + expect(bashOutput).toContain("testdata"); // responseText might be empty if the model decides not to comment on the output // so we make this check optional or less strict if the tool output is correct @@ -318,14 +337,16 @@ describeIntegration("Runtime Bash Execution", () => { const maxDuration = 10000; expect(toolDuration).toBeLessThan(maxDuration); - // Verify bash tool was called + // Verify task(kind="bash") was called const toolCallStarts = events.filter( (e) => "type" in e && e.type === "tool-call-start" ); - const bashCalls = toolCallStarts.filter( - (e) => "toolName" in e && e.toolName === "bash" - ); - expect(bashCalls.length).toBeGreaterThan(0); + const taskCalls = toolCallStarts.filter((e) => { + if (!("toolName" in e) || e.toolName !== "task") return false; + const args = (e as { args?: { kind?: string } }).args; + return args?.kind === "bash"; + }); + expect(taskCalls.length).toBeGreaterThan(0); } finally { await cleanup(); } @@ -364,37 +385,28 @@ describeIntegration("Runtime Bash Execution", () => { ); try { - // Create some test files to search through - await sendMessageAndWait( - env, - workspaceId, - 'Run bash: for i in {1..1000}; do echo "terminal bench line $i" >> testfile.txt; done', - HAIKU_MODEL, - BASH_ONLY - ); - // Test grep | head pattern - this historically hangs over SSH // This is a regression test for the bash hang issue const events = await sendMessageAndWait( env, workspaceId, - 'Run bash: grep -n "terminal bench" testfile.txt | head -n 200', + 'Use the task tool with args: { kind: "bash", script: "for i in {1..1000}; do echo \"terminal bench line $i\" >> testfile.txt; done && grep -n \"terminal bench\" testfile.txt | head -n 200", timeout_secs: 60, run_in_background: false, display_name: "grep-head" }. Do not spawn a sub-agent.', HAIKU_MODEL, - BASH_ONLY, + TASK_BASH_ONLY, 30000 // Relaxed timeout for CI stability (was 15s) ); // Calculate actual tool execution duration - const toolDuration = getToolDuration(events, "bash"); + const toolDuration = getToolDuration(events, "task"); // Verify command completed successfully (not timeout) - // Check that the bash tool completed (tool-call-end events exist) + // Check that task(kind="bash") completed (tool-call-end events exist) const toolCallEnds = events.filter( (e) => "type" in e && e.type === "tool-call-end" && "toolName" in e && - e.toolName === "bash" + e.toolName === "task" ); expect(toolCallEnds.length).toBeGreaterThan(0); @@ -404,14 +416,16 @@ describeIntegration("Runtime Bash Execution", () => { const maxDuration = 15000; expect(toolDuration).toBeLessThan(maxDuration); - // Verify bash tool was called + // Verify task(kind="bash") was called const toolCallStarts = events.filter( (e) => "type" in e && e.type === "tool-call-start" ); - const bashCalls = toolCallStarts.filter( - (e) => "toolName" in e && e.toolName === "bash" - ); - expect(bashCalls.length).toBeGreaterThan(0); + const taskCalls = toolCallStarts.filter((e) => { + if (!("toolName" in e) || e.toolName !== "task") return false; + const args = (e as { args?: { kind?: string } }).args; + return args?.kind === "bash"; + }); + expect(taskCalls.length).toBeGreaterThan(0); } finally { await cleanup(); } diff --git a/tests/ipc/sendMessage.context.test.ts b/tests/ipc/sendMessage.context.test.ts index 628ef6865e..99e527386a 100644 --- a/tests/ipc/sendMessage.context.test.ts +++ b/tests/ipc/sendMessage.context.test.ts @@ -152,7 +152,7 @@ describeIntegration("sendMessage context handling tests", () => { describe("tool calls", () => { test.concurrent( - "should execute bash tool when requested", + 'should execute task(kind="bash") tool when requested', async () => { await withSharedWorkspace("anthropic", async ({ env, workspaceId, collector }) => { const repoPath = getSharedRepoPath(); @@ -162,13 +162,15 @@ describeIntegration("sendMessage context handling tests", () => { await fs.writeFile(testFilePath, "Hello from test file!"); try { - // Ask to read the file using bash - // Default toolPolicy (undefined) allows all tools + // Ask to read the file using task(kind="bash") const result = await sendMessageWithModel( env, workspaceId, - `Read the contents of the file at ${testFilePath} using the bash tool with cat.`, - modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId) + `Use task(kind="bash") to run: cat ${testFilePath}. Set display_name="read-file" and timeout_secs=30. Do not spawn a sub-agent.`, + modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId), + { + toolPolicy: [{ regex_match: "task", action: "require" }], + } ); expect(result.success).toBe(true); @@ -182,8 +184,12 @@ describeIntegration("sendMessage context handling tests", () => { (e) => "type" in e && (e as { type: string }).type === "tool-call-start" ); - // Should have at least one tool call - expect(toolCallStarts.length).toBeGreaterThan(0); + // Should have at least one task(kind="bash") tool call + const bashTaskCall = toolCallStarts.find((e) => { + if (!("toolName" in e) || e.toolName !== "task") return false; + return (e as { args?: { kind?: string } }).args?.kind === "bash"; + }); + expect(bashTaskCall).toBeDefined(); } finally { // Cleanup test file try { @@ -206,7 +212,7 @@ describeIntegration("sendMessage context handling tests", () => { const result = await sendMessageWithModel( env, workspaceId, - "Run the command 'echo test' using bash.", + "Run the command 'echo test' using task(kind=\"bash\").", modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId), { toolPolicy: [{ regex_match: ".*", action: "disable" }], diff --git a/tests/ipc/truncate.test.ts b/tests/ipc/truncate.test.ts index 2ffcf1a6a8..0d0f8cb294 100644 --- a/tests/ipc/truncate.test.ts +++ b/tests/ipc/truncate.test.ts @@ -4,6 +4,7 @@ import { createStreamCollector, assertStreamSuccess, resolveOrpcClient, + modelString, } from "./helpers"; import { HistoryService } from "../../src/node/services/historyService"; import { createMuxMessage } from "../../src/common/types/message"; @@ -220,7 +221,11 @@ describeIntegration("truncateHistory", () => { void sendMessageWithModel( env, workspaceId, - "Run this bash command: for i in {1..60}; do sleep 0.5; done && echo done" + 'Use task(kind="bash") to run: for i in {1..60}; do sleep 0.5; done && echo done. Set display_name="truncate-stream" and timeout_secs=120. Do not spawn a sub-agent.', + modelString("anthropic", "claude-sonnet-4-5"), + { + toolPolicy: [{ regex_match: "task", action: "require" }], + } ); // Wait for stream to start