+
{title && {title}}
+ {exitCode !== undefined && exit {exitCode}}
+ {elapsedMs !== undefined && {elapsedMs}ms}
+ {!isCompleted && output && output.length > 0 && (
+
+ {output}
+
+ )}
+
{reportMarkdown && (
)}
+ {note &&
{note}
}
+
{"error" in result && result.error && (
{result.error}
)}
diff --git a/src/browser/contexts/API.test.tsx b/src/browser/contexts/API.test.tsx
index 08bcc31989..8642e4bca2 100644
--- a/src/browser/contexts/API.test.tsx
+++ b/src/browser/contexts/API.test.tsx
@@ -66,8 +66,13 @@ void mock.module("@orpc/client/message-port", () => ({
}));
void mock.module("@/browser/components/AuthTokenModal", () => ({
+ // Note: Module mocks leak between bun test files.
+ // Export all commonly-used symbols to avoid cross-test import errors.
+ AuthTokenModal: () => null,
getStoredAuthToken: () => null,
// eslint-disable-next-line @typescript-eslint/no-empty-function
+ setStoredAuthToken: () => {},
+ // eslint-disable-next-line @typescript-eslint/no-empty-function
clearStoredAuthToken: () => {},
}));
diff --git a/src/browser/hooks/useVoiceInput.ts b/src/browser/hooks/useVoiceInput.ts
index 693b664037..e6af468654 100644
--- a/src/browser/hooks/useVoiceInput.ts
+++ b/src/browser/hooks/useVoiceInput.ts
@@ -56,8 +56,12 @@ export interface UseVoiceInputResult {
* We hide our voice UI on these devices to avoid redundancy with system dictation.
*/
function hasTouchDictation(): boolean {
- if (typeof window === "undefined") return false;
- const hasTouch = "ontouchstart" in window || navigator.maxTouchPoints > 0;
+ if (typeof window === "undefined" || typeof navigator === "undefined") return false;
+
+ const maxTouchPoints =
+ typeof navigator.maxTouchPoints === "number" ? navigator.maxTouchPoints : 0;
+ const hasTouch = "ontouchstart" in window || maxTouchPoints > 0;
+
// Touch-only check: most touch devices have native dictation.
// We don't check screen size because iPads are large but still have dictation.
return hasTouch;
@@ -66,7 +70,9 @@ function hasTouchDictation(): boolean {
const HAS_TOUCH_DICTATION = hasTouchDictation();
const HAS_MEDIA_RECORDER = typeof window !== "undefined" && typeof MediaRecorder !== "undefined";
const HAS_GET_USER_MEDIA =
- typeof window !== "undefined" && typeof navigator.mediaDevices?.getUserMedia === "function";
+ typeof window !== "undefined" &&
+ typeof navigator !== "undefined" &&
+ typeof navigator.mediaDevices?.getUserMedia === "function";
// =============================================================================
// Global Key State Tracking
@@ -79,7 +85,7 @@ const HAS_GET_USER_MEDIA =
*/
let isSpaceCurrentlyHeld = false;
-if (typeof window !== "undefined") {
+if (typeof window !== "undefined" && typeof window.addEventListener === "function") {
window.addEventListener(
"keydown",
(e) => {
diff --git a/src/browser/utils/RefreshController.test.ts b/src/browser/utils/RefreshController.test.ts
index e4aa56babc..b1aae0c719 100644
--- a/src/browser/utils/RefreshController.test.ts
+++ b/src/browser/utils/RefreshController.test.ts
@@ -1,18 +1,38 @@
-import { describe, it, expect, beforeEach, afterEach, jest } from "@jest/globals";
+import { describe, it, expect, afterEach, jest, setSystemTime } from "bun:test";
import { RefreshController } from "./RefreshController";
+async function sleep(ms: number): Promise
{
+ await new Promise((resolve) => setTimeout(resolve, ms));
+}
+
describe("RefreshController", () => {
- beforeEach(() => {
- jest.useFakeTimers();
+ afterEach(() => {
+ // Some tests manipulate Date.now via setSystemTime(); always restore.
+ setSystemTime();
});
- afterEach(() => {
- jest.useRealTimers();
+ it("debounces schedule() calls (resets timer)", async () => {
+ const onRefresh = jest.fn<() => void>();
+ const controller = new RefreshController({ onRefresh, debounceMs: 50 });
+
+ controller.schedule();
+ await sleep(20);
+ controller.schedule(); // Resets debounce timer
+
+ // Not yet: only 30ms since last call (< debounceMs)
+ await sleep(30);
+ expect(onRefresh).not.toHaveBeenCalled();
+
+ // Now past debounceMs since last call
+ await sleep(30);
+ expect(onRefresh).toHaveBeenCalledTimes(1);
+
+ controller.dispose();
});
- it("debounces multiple schedule() calls", () => {
+ it("coalesces calls during debounce window", async () => {
const onRefresh = jest.fn<() => void>();
- const controller = new RefreshController({ onRefresh, debounceMs: 100 });
+ const controller = new RefreshController({ onRefresh, debounceMs: 50 });
controller.schedule();
controller.schedule();
@@ -20,16 +40,16 @@ describe("RefreshController", () => {
expect(onRefresh).not.toHaveBeenCalled();
- jest.advanceTimersByTime(100);
+ await sleep(60);
expect(onRefresh).toHaveBeenCalledTimes(1);
controller.dispose();
});
- it("requestImmediate() bypasses debounce", () => {
+ it("requestImmediate() bypasses debounce timer", async () => {
const onRefresh = jest.fn<() => void>();
- const controller = new RefreshController({ onRefresh, debounceMs: 100 });
+ const controller = new RefreshController({ onRefresh, debounceMs: 50 });
controller.schedule();
expect(onRefresh).not.toHaveBeenCalled();
@@ -37,8 +57,8 @@ describe("RefreshController", () => {
controller.requestImmediate();
expect(onRefresh).toHaveBeenCalledTimes(1);
- // Original debounce timer should be cleared
- jest.advanceTimersByTime(100);
+ // Original timer should be cleared
+ await sleep(60);
expect(onRefresh).toHaveBeenCalledTimes(1);
controller.dispose();
@@ -64,6 +84,49 @@ describe("RefreshController", () => {
controller.dispose();
});
+ it("schedule() during in-flight queues refresh for after completion", async () => {
+ const resolvers: Array<() => void> = [];
+ const onRefresh = jest.fn(
+ () =>
+ new Promise((resolve) => {
+ resolvers.push(resolve);
+ })
+ );
+
+ const controller = new RefreshController({ onRefresh, debounceMs: 20 });
+
+ // Start first refresh
+ controller.requestImmediate();
+ expect(onRefresh).toHaveBeenCalledTimes(1);
+ expect(resolvers).toHaveLength(1);
+
+ // schedule() while in-flight should queue, not start a second refresh
+ controller.schedule();
+
+ // Ensure the debounce timer has fired while we're still in-flight.
+ await sleep(30);
+ expect(onRefresh).toHaveBeenCalledTimes(1);
+
+ // Complete the first refresh and let .finally() run.
+ resolvers[0]();
+ await Promise.resolve();
+ await Promise.resolve(); // Extra tick for .finally()
+
+ // Allow post-flight setTimeout(0) to run
+ await sleep(0);
+ await sleep(10);
+
+ expect(onRefresh).toHaveBeenCalledTimes(2);
+ expect(resolvers).toHaveLength(2);
+
+ // Resolve the follow-up refresh promise to avoid leaving it in-flight.
+ resolvers[1]();
+ await Promise.resolve();
+ await Promise.resolve();
+
+ controller.dispose();
+ });
+
it("isRefreshing reflects in-flight state", () => {
let resolveRefresh: () => void;
const refreshPromise = new Promise((resolve) => {
@@ -84,27 +147,27 @@ describe("RefreshController", () => {
controller.dispose();
});
- it("dispose() cleans up debounce timer", () => {
+ it("dispose() cleans up debounce timer", async () => {
const onRefresh = jest.fn<() => void>();
- const controller = new RefreshController({ onRefresh, debounceMs: 100 });
+ const controller = new RefreshController({ onRefresh, debounceMs: 50 });
controller.schedule();
controller.dispose();
- jest.advanceTimersByTime(100);
+ await sleep(60);
expect(onRefresh).not.toHaveBeenCalled();
});
- it("does not refresh after dispose", () => {
+ it("does not refresh after dispose", async () => {
const onRefresh = jest.fn<() => void>();
- const controller = new RefreshController({ onRefresh, debounceMs: 100 });
+ const controller = new RefreshController({ onRefresh, debounceMs: 50 });
controller.dispose();
controller.schedule();
controller.requestImmediate();
- jest.advanceTimersByTime(100);
+ await sleep(60);
expect(onRefresh).not.toHaveBeenCalled();
});
diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts
index f84854f291..f4d97ca167 100644
--- a/src/common/utils/tools/toolDefinitions.ts
+++ b/src/common/utils/tools/toolDefinitions.ts
@@ -94,7 +94,7 @@ const SubagentTypeSchema = z.preprocess(
z.enum(BUILT_IN_SUBAGENT_TYPES)
);
-export const TaskToolArgsSchema = z
+const TaskToolAgentArgsSchema = z
.object({
subagent_type: SubagentTypeSchema,
prompt: z.string().min(1),
@@ -103,6 +103,54 @@ export const TaskToolArgsSchema = z
})
.strict();
+const TaskToolBashArgsSchema = z
+ .object({
+ kind: z.literal("bash"),
+ script: z.string().min(1),
+ timeout_secs: z.number().positive(),
+ run_in_background: z.boolean().default(false),
+ display_name: z.string().min(1),
+ })
+ .strict();
+
+// NOTE: Several providers require tool schemas to be a *single* JSON Schema object.
+// In particular, Anthropic rejects union/anyOf schemas for tool input.
+//
+// To keep the provider-facing schema as `type: "object"` while still enforcing a strict
+// agent-vs-bash split, we validate via superRefine against the appropriate strict schema.
+export const TaskToolArgsSchema = z
+ .object({
+ // Discriminator for bash tasks. Omit for agent tasks.
+ kind: z.literal("bash").optional(),
+
+ // Agent task args
+ subagent_type: SubagentTypeSchema.optional(),
+ prompt: z.string().min(1).optional(),
+ title: z.string().min(1).optional(),
+
+ // Shared
+ run_in_background: z.boolean().default(false),
+
+ // Bash task args
+ script: z.string().min(1).optional(),
+ timeout_secs: z.number().positive().optional(),
+ display_name: z.string().min(1).optional(),
+ })
+ .strict()
+ .superRefine((args, ctx) => {
+ const strictSchema = args.kind === "bash" ? TaskToolBashArgsSchema : TaskToolAgentArgsSchema;
+ const parsed = strictSchema.safeParse(args);
+ if (!parsed.success) {
+ for (const issue of parsed.error.issues) {
+ ctx.addIssue({
+ code: z.ZodIssueCode.custom,
+ message: issue.message,
+ path: issue.path,
+ });
+ }
+ }
+ });
+
export const TaskToolQueuedResultSchema = z
.object({
status: z.enum(["queued", "running"]),
@@ -117,6 +165,14 @@ export const TaskToolCompletedResultSchema = z
reportMarkdown: z.string(),
title: z.string().optional(),
agentType: z.string().optional(),
+ exitCode: z.number().optional(),
+ note: z.string().optional(),
+ truncated: z
+ .object({
+ reason: z.string(),
+ totalLines: z.number(),
+ })
+ .optional(),
})
.strict();
@@ -137,12 +193,28 @@ export const TaskAwaitToolArgsSchema = z
.describe(
"List of task IDs to await. When omitted, waits for all active descendant tasks of the current workspace."
),
+ filter: z
+ .string()
+ .optional()
+ .describe(
+ "Optional regex to filter bash task output lines. By default, only matching lines are returned. " +
+ "When filter_exclude is true, matching lines are excluded instead. " +
+ "Non-matching lines are discarded and cannot be retrieved later."
+ ),
+ filter_exclude: z
+ .boolean()
+ .optional()
+ .describe(
+ "When true, lines matching 'filter' are excluded instead of kept. " +
+ "Requires 'filter' to be set."
+ ),
timeout_secs: z
.number()
- .positive()
+ .min(0)
.optional()
.describe(
"Maximum time to wait in seconds for each task. " +
+ "For bash tasks, this waits for NEW output (or process exit). " +
"If exceeded, the result returns status=queued|running|awaiting_report (task is still active). " +
"Optional, defaults to 10 minutes."
),
@@ -155,6 +227,10 @@ export const TaskAwaitToolCompletedResultSchema = z
taskId: z.string(),
reportMarkdown: z.string(),
title: z.string().optional(),
+ output: z.string().optional(),
+ elapsed_ms: z.number().optional(),
+ exitCode: z.number().optional(),
+ note: z.string().optional(),
})
.strict();
@@ -162,6 +238,9 @@ export const TaskAwaitToolActiveResultSchema = z
.object({
status: z.enum(["queued", "running", "awaiting_report"]),
taskId: z.string(),
+ output: z.string().optional(),
+ elapsed_ms: z.number().optional(),
+ note: z.string().optional(),
})
.strict();
@@ -513,15 +592,18 @@ export const TOOL_DEFINITIONS = {
},
task: {
description:
- "Spawn a sub-agent task in a child workspace. " +
- 'Use this to delegate work to specialized presets like "explore" (read-only investigation) or "exec" (general-purpose coding in a child workspace). ' +
- "If run_in_background is false, this tool blocks until the sub-agent calls agent_report, then returns the report. " +
- "If run_in_background is true, you can await it later with task_await.",
+ "Unified task tool for (1) spawning sub-agent tasks and (2) running bash commands. " +
+ "\n\nAgent tasks: provide subagent_type, prompt, title, run_in_background. " +
+ '\nBash tasks: set kind="bash" and provide script, timeout_secs, display_name, run_in_background. ' +
+ "\n\nIf run_in_background is false, returns a completed reportMarkdown. " +
+ "If run_in_background is true, returns a running taskId; use task_await to read incremental output and task_terminate to stop it.",
schema: TaskToolArgsSchema,
},
task_await: {
description:
- "Wait for one or more sub-agent tasks to finish and return their reports. " +
+ "Wait for one or more tasks to produce output. " +
+ "Agent tasks return reports when completed. " +
+ "Bash tasks return incremental output while running and a final reportMarkdown when they exit. " +
"Use this tool to WAIT; do not poll task_list in a loop to wait for task completion (that is misuse and wastes tool calls). " +
"This is similar to Promise.allSettled(): you always get per-task results. " +
"Possible statuses: completed, queued, running, awaiting_report, not_found, invalid_scope, error.",
@@ -529,15 +611,16 @@ export const TOOL_DEFINITIONS = {
},
task_terminate: {
description:
- "Terminate one or more sub-agent tasks immediately. " +
- "This stops their AI streams and deletes their workspaces (best-effort). " +
+ "Terminate one or more tasks immediately (sub-agent tasks or background bash tasks). " +
+ "For sub-agent tasks, this stops their AI streams and deletes their workspaces (best-effort). " +
"No report will be delivered; any in-progress work is discarded. " +
"If the task has descendant sub-agent tasks, they are terminated too.",
schema: TaskTerminateToolArgsSchema,
},
task_list: {
description:
- "List descendant sub-agent tasks for the current workspace, including their status and metadata. " +
+ "List descendant tasks for the current workspace, including status + metadata. " +
+ "This includes sub-agent tasks and background bash tasks. " +
"Use this after compaction or interruptions to rediscover which tasks are still active. " +
"This is a discovery tool, NOT a waiting mechanism: if you need to wait for tasks to finish, call task_await (optionally omit task_ids to await all active descendant tasks).",
schema: TaskListToolArgsSchema,
@@ -961,10 +1044,6 @@ export function getAvailableTools(
// Base tools available for all models
const baseTools = [
- "bash",
- "bash_output",
- "bash_background_list",
- "bash_background_terminate",
"file_read",
"agent_skill_read",
"agent_skill_read_file",
diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts
index e31c485c34..9bd3d68b6e 100644
--- a/src/common/utils/tools/tools.ts
+++ b/src/common/utils/tools/tools.ts
@@ -20,6 +20,7 @@ import { createAgentSkillReadFileTool } from "@/node/services/tools/agent_skill_
import { createAgentReportTool } from "@/node/services/tools/agent_report";
import { wrapWithInitWait } from "@/node/services/tools/wrapWithInitWait";
import { log } from "@/node/services/log";
+import { getAvailableTools } from "@/common/utils/tools/toolDefinitions";
import { sanitizeMCPToolsForOpenAI } from "@/common/utils/tools/schemaSanitizer";
import type { Runtime } from "@/node/runtime/Runtime";
@@ -139,10 +140,19 @@ export async function getToolsForModel(
// to leave repository in broken state due to issues with concurrent file modifications
// and line number miscalculations. Use file_edit_replace_string instead.
// file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)),
+
+ // Unified task abstraction (agent + bash)
+ task: wrap(createTaskTool(config)),
+ task_await: wrap(createTaskAwaitTool(config)),
+ task_terminate: wrap(createTaskTerminateTool(config)),
+ task_list: wrap(createTaskListTool(config)),
+
+ // Legacy bash tools (deprecated: prefer task(kind="bash"))
bash: wrap(createBashTool(config)),
bash_output: wrap(createBashOutputTool(config)),
bash_background_list: wrap(createBashBackgroundListTool(config)),
bash_background_terminate: wrap(createBashBackgroundTerminateTool(config)),
+
web_fetch: wrap(createWebFetchTool(config)),
};
@@ -150,10 +160,6 @@ export async function getToolsForModel(
const nonRuntimeTools: Record = {
...(config.mode === "plan" ? { ask_user_question: createAskUserQuestionTool(config) } : {}),
propose_plan: createProposePlanTool(config),
- task: createTaskTool(config),
- task_await: createTaskAwaitTool(config),
- task_terminate: createTaskTerminateTool(config),
- task_list: createTaskListTool(config),
...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}),
todo_write: createTodoWriteTool(config),
todo_read: createTodoReadTool(config),
@@ -220,6 +226,19 @@ export async function getToolsForModel(
log.error(`No web search tools available for ${provider}:`, error);
}
+ // Filter tools to the canonical allowlist so system prompt + toolset stay in sync.
+ // Include MCP tools even if they're not in getAvailableTools().
+ const allowlistedToolNames = new Set(
+ getAvailableTools(modelString, config.mode, { enableAgentReport: config.enableAgentReport })
+ );
+ for (const toolName of Object.keys(mcpTools ?? {})) {
+ allowlistedToolNames.add(toolName);
+ }
+
+ allTools = Object.fromEntries(
+ Object.entries(allTools).filter(([toolName]) => allowlistedToolNames.has(toolName))
+ );
+
// Apply tool-specific instructions if provided
if (toolInstructions) {
const augmentedTools: Record = {};
diff --git a/src/node/services/agentPresets.ts b/src/node/services/agentPresets.ts
index f5d1bee1a4..bc71b6f77c 100644
--- a/src/node/services/agentPresets.ts
+++ b/src/node/services/agentPresets.ts
@@ -47,9 +47,6 @@ function buildSystemPrompt(args: {
const EXEC_PRESET: AgentPreset = {
agentType: "exec",
toolPolicy: [
- // Non-recursive: subagents must not spawn more subagents.
- { regex_match: "task", action: "disable" },
- { regex_match: "task_.*", action: "disable" },
// Only the main plan-mode session should call propose_plan.
{ regex_match: "propose_plan", action: "disable" },
],
@@ -60,7 +57,7 @@ const EXEC_PRESET: AgentPreset = {
"- Make minimal, correct changes that match existing codebase patterns.",
],
rules: [
- "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).",
+ "- You MUST NOT spawn additional sub-agent tasks.",
"- Do not call propose_plan.",
"- Prefer small, reviewable diffs and run targeted checks when feasible.",
],
@@ -71,10 +68,10 @@ const EXPLORE_PRESET: AgentPreset = {
agentType: "explore",
toolPolicy: enableOnly(
"file_read",
- "bash",
- "bash_output",
- "bash_background_list",
- "bash_background_terminate",
+ "task",
+ "task_await",
+ "task_list",
+ "task_terminate",
"web_fetch",
"web_search",
"google_search",
@@ -92,8 +89,8 @@ const EXPLORE_PRESET: AgentPreset = {
"- You MUST NOT create temporary files anywhere (including /tmp).",
"- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.",
"- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).",
- "- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).",
- "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).",
+ '- Use task(kind="bash") only for read-only operations (rg, ls, cat, git diff/show/log, etc.).',
+ "- You MUST NOT spawn additional sub-agent tasks.",
],
}),
};
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index eb4f0c1b0b..e0852a13ce 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -1263,7 +1263,7 @@ export class AIService extends EventEmitter {
"",
"Nesting:",
`- Task delegation is disabled in this workspace (taskDepth=${taskDepth}, maxTaskNestingDepth=${taskSettings.maxTaskNestingDepth}).`,
- "- Do not call task/task_await/task_list/task_terminate.",
+ "- You MUST NOT spawn additional sub-agent tasks.",
].join("\n")
: agentPreset.systemPrompt
: undefined;
@@ -1370,12 +1370,9 @@ export class AIService extends EventEmitter {
mcpTools
);
- const depthToolPolicy: ToolPolicy = shouldDisableTaskToolsForDepth
- ? [
- { regex_match: "task", action: "disable" },
- { regex_match: "task_.*", action: "disable" },
- ]
- : [];
+ // Note: task is the unified abstraction for both agent delegation and bash execution.
+ // Do not disable it at max depth; rely on TaskService/createTaskTool to reject delegation.
+ const depthToolPolicy: ToolPolicy = [];
// Preset + depth tool policies must be applied last so callers cannot re-enable restricted tools.
const effectiveToolPolicy =
diff --git a/src/node/services/backgroundProcessManager.ts b/src/node/services/backgroundProcessManager.ts
index 65e910d9f6..25d3beb846 100644
--- a/src/node/services/backgroundProcessManager.ts
+++ b/src/node/services/backgroundProcessManager.ts
@@ -454,6 +454,7 @@ export class BackgroundProcessManager extends EventEmitter= 3 && filterExclude && currentStatus === "running";
+ const pollingToolName = noteToolName ?? "bash_output";
+
let note: string | undefined;
if (shouldSuggestFilterExclude) {
note =
- "STOP POLLING. You've called bash_output 3+ times on this process. " +
+ `STOP POLLING. You've called ${pollingToolName} 3+ times on this process. ` +
"This wastes tokens and clutters the conversation. " +
"Instead, make ONE call with: filter='⏳|progress|waiting|\\\\\\.\\\\\\.\\\\\\.', " +
"filter_exclude=true, timeout_secs=120. This blocks until meaningful output arrives.";
diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts
index c6ec24cc25..95b2e8bc87 100644
--- a/src/node/services/taskService.test.ts
+++ b/src/node/services/taskService.test.ts
@@ -1173,6 +1173,48 @@ describe("TaskService", () => {
expect(report.title).toBe("t");
});
+ test("isDescendantAgentTask consults cached ancestry after workspace is removed", async () => {
+ const config = await createTestConfig(rootDir);
+
+ const projectPath = path.join(rootDir, "repo");
+ const parentId = "parent-111";
+ const childId = "child-222";
+
+ await config.saveConfig({
+ projects: new Map([
+ [
+ projectPath,
+ {
+ workspaces: [
+ { path: path.join(projectPath, "parent"), id: parentId, name: "parent" },
+ {
+ path: path.join(projectPath, "child"),
+ id: childId,
+ name: "agent_explore_child",
+ parentWorkspaceId: parentId,
+ agentType: "explore",
+ taskStatus: "running",
+ },
+ ],
+ },
+ ],
+ ]),
+ taskSettings: { maxParallelAgentTasks: 1, maxTaskNestingDepth: 3 },
+ });
+
+ const { taskService } = createTaskServiceHarness(config);
+
+ const internal = taskService as unknown as {
+ resolveWaiters: (taskId: string, report: { reportMarkdown: string; title?: string }) => void;
+ };
+ internal.resolveWaiters(childId, { reportMarkdown: "ok", title: "t" });
+
+ await config.removeWorkspace(childId);
+
+ expect(taskService.isDescendantAgentTask(parentId, childId)).toBe(true);
+ expect(taskService.isDescendantAgentTask("other-parent", childId)).toBe(false);
+ });
+
test("waitForAgentReport cache is cleared by TTL cleanup", async () => {
const config = await createTestConfig(rootDir);
diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts
index 44e53c6be5..8fb617fa74 100644
--- a/src/node/services/taskService.ts
+++ b/src/node/services/taskService.ts
@@ -94,6 +94,15 @@ interface PendingTaskStartWaiter {
cleanup: () => void;
}
+interface CompletedAgentReportCacheEntry {
+ reportMarkdown: string;
+ title?: string;
+ expiresAtMs: number;
+ // Ancestor workspace IDs captured when the report was cached.
+ // Used to keep descendant-scope checks working even if the task workspace is cleaned up.
+ ancestorWorkspaceIds: string[];
+}
+
function isToolCallEndEvent(value: unknown): value is ToolCallEndEvent {
return (
typeof value === "object" &&
@@ -173,10 +182,7 @@ export class TaskService {
private readonly foregroundAwaitCountByWorkspaceId = new Map();
// Cache completed reports so callers can retrieve them even after the task workspace is removed.
// Bounded by TTL + max entries (see COMPLETED_REPORT_CACHE_*).
- private readonly completedReportsByTaskId = new Map<
- string,
- { reportMarkdown: string; title?: string; expiresAtMs: number }
- >();
+ private readonly completedReportsByTaskId = new Map();
private readonly remindedAwaitingReport = new Set();
constructor(
@@ -1049,7 +1055,20 @@ export class TaskService {
const cfg = this.config.loadConfigOrDefault();
const parentById = this.buildAgentTaskIndex(cfg).parentById;
- return this.isDescendantAgentTaskUsingParentById(parentById, ancestorWorkspaceId, taskId);
+ if (this.isDescendantAgentTaskUsingParentById(parentById, ancestorWorkspaceId, taskId)) {
+ return true;
+ }
+
+ // The task workspace may have been removed after it reported (cleanup). Preserve scope checks
+ // by consulting the completed-report cache, which tracks the task's ancestor chain.
+ const nowMs = Date.now();
+ this.cleanupExpiredCompletedReports(nowMs);
+ const cached = this.completedReportsByTaskId.get(taskId);
+ if (cached && cached.expiresAtMs > nowMs) {
+ return cached.ancestorWorkspaceIds.includes(ancestorWorkspaceId);
+ }
+
+ return false;
}
private isDescendantAgentTaskUsingParentById(
@@ -1072,6 +1091,25 @@ export class TaskService {
// --- Internal orchestration ---
+ private listAncestorWorkspaceIdsUsingParentById(
+ parentById: Map,
+ taskId: string
+ ): string[] {
+ const ancestors: string[] = [];
+
+ let current = taskId;
+ for (let i = 0; i < 32; i++) {
+ const parent = parentById.get(current);
+ if (!parent) return ancestors;
+ ancestors.push(parent);
+ current = parent;
+ }
+
+ throw new Error(
+ `listAncestorWorkspaceIdsUsingParentById: possible parentWorkspaceId cycle starting at ${taskId}`
+ );
+ }
+
private listAgentTaskWorkspaces(
config: ReturnType
): AgentTaskWorkspaceEntry[] {
@@ -1842,10 +1880,16 @@ export class TaskService {
private resolveWaiters(taskId: string, report: { reportMarkdown: string; title?: string }): void {
const nowMs = Date.now();
this.cleanupExpiredCompletedReports(nowMs);
+
+ const cfg = this.config.loadConfigOrDefault();
+ const parentById = this.buildAgentTaskIndex(cfg).parentById;
+ const ancestorWorkspaceIds = this.listAncestorWorkspaceIdsUsingParentById(parentById, taskId);
+
this.completedReportsByTaskId.set(taskId, {
reportMarkdown: report.reportMarkdown,
title: report.title,
expiresAtMs: nowMs + COMPLETED_REPORT_CACHE_TTL_MS,
+ ancestorWorkspaceIds,
});
this.enforceCompletedReportCacheLimit();
diff --git a/src/node/services/tools/task.bash.test.ts b/src/node/services/tools/task.bash.test.ts
new file mode 100644
index 0000000000..721a66babb
--- /dev/null
+++ b/src/node/services/tools/task.bash.test.ts
@@ -0,0 +1,178 @@
+import { describe, it, expect, mock } from "bun:test";
+import type { ToolCallOptions } from "ai";
+
+import { createTaskTool } from "./task";
+import { createTaskAwaitTool } from "./task_await";
+import { createTaskListTool } from "./task_list";
+import { createTaskTerminateTool } from "./task_terminate";
+import type { BackgroundProcessManager } from "@/node/services/backgroundProcessManager";
+import { TestTempDir, createTestToolConfig } from "./testHelpers";
+import type { TaskService } from "@/node/services/taskService";
+
+const mockToolCallOptions: ToolCallOptions = {
+ toolCallId: "test-call-id",
+ messages: [],
+};
+
+describe("task_* bash tasks", () => {
+ it("task(kind=bash) returns a running taskId for background commands", async () => {
+ using tempDir = new TestTempDir("test-task-bash");
+
+ const spawn = mock(() => ({
+ success: true as const,
+ processId: "proc-1",
+ outputDir: "ignored",
+ pid: 123,
+ }));
+
+ const backgroundProcessManager = { spawn } as unknown as BackgroundProcessManager;
+
+ const tool = createTaskTool({
+ ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }),
+ backgroundProcessManager,
+ });
+
+ const result: unknown = await Promise.resolve(
+ tool.execute!(
+ {
+ kind: "bash",
+ script: "echo hi",
+ timeout_secs: 10,
+ run_in_background: true,
+ display_name: "My Proc",
+ },
+ mockToolCallOptions
+ )
+ );
+
+ expect(spawn).toHaveBeenCalled();
+ expect(result).toEqual({ status: "running", taskId: "bash:proc-1" });
+ });
+
+ it("task_await returns incremental output for bash tasks", async () => {
+ using tempDir = new TestTempDir("test-task-await-bash");
+
+ const getProcess = mock(() => ({ id: "proc-1", workspaceId: "ws-1", displayName: "My Proc" }));
+ const getOutput = mock(() => ({
+ success: true as const,
+ status: "running" as const,
+ output: "hello",
+ elapsed_ms: 5,
+ }));
+
+ const backgroundProcessManager = {
+ getProcess,
+ getOutput,
+ } as unknown as BackgroundProcessManager;
+
+ const taskService = {
+ listActiveDescendantAgentTaskIds: mock(() => []),
+ isDescendantAgentTask: mock(() => false),
+ waitForAgentReport: mock(() => Promise.resolve({ reportMarkdown: "ignored" })),
+ } as unknown as TaskService;
+
+ const tool = createTaskAwaitTool({
+ ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }),
+ backgroundProcessManager,
+ taskService,
+ });
+
+ const result: unknown = await Promise.resolve(
+ tool.execute!({ task_ids: ["bash:proc-1"], timeout_secs: 0 }, mockToolCallOptions)
+ );
+
+ expect(getProcess).toHaveBeenCalledWith("proc-1");
+ expect(getOutput).toHaveBeenCalled();
+ expect(result).toEqual({
+ results: [
+ {
+ status: "running",
+ taskId: "bash:proc-1",
+ output: "hello",
+ elapsed_ms: 5,
+ note: undefined,
+ },
+ ],
+ });
+ });
+
+ it("task_list includes background bash tasks", async () => {
+ using tempDir = new TestTempDir("test-task-list-bash");
+
+ const startTime = Date.parse("2025-01-01T00:00:00.000Z");
+ const list = mock(() => [
+ {
+ id: "proc-1",
+ workspaceId: "ws-1",
+ status: "running" as const,
+ displayName: "My Proc",
+ startTime,
+ },
+ ]);
+
+ const backgroundProcessManager = { list } as unknown as BackgroundProcessManager;
+
+ const taskService = {
+ listDescendantAgentTasks: mock(() => []),
+ isDescendantAgentTask: mock(() => false),
+ } as unknown as TaskService;
+
+ const tool = createTaskListTool({
+ ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }),
+ backgroundProcessManager,
+ taskService,
+ });
+
+ const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions));
+
+ expect(result).toEqual({
+ tasks: [
+ {
+ taskId: "bash:proc-1",
+ status: "running",
+ parentWorkspaceId: "ws-1",
+ title: "My Proc",
+ createdAt: new Date(startTime).toISOString(),
+ depth: 1,
+ },
+ ],
+ });
+ });
+
+ it("task_terminate can terminate bash tasks", async () => {
+ using tempDir = new TestTempDir("test-task-terminate-bash");
+
+ const getProcess = mock(() => ({ id: "proc-1", workspaceId: "ws-1" }));
+ const terminate = mock(() => ({ success: true as const }));
+
+ const backgroundProcessManager = {
+ getProcess,
+ terminate,
+ } as unknown as BackgroundProcessManager;
+
+ const taskService = {
+ terminateDescendantAgentTask: mock(() =>
+ Promise.resolve({ success: false, error: "not used" })
+ ),
+ isDescendantAgentTask: mock(() => false),
+ } as unknown as TaskService;
+
+ const tool = createTaskTerminateTool({
+ ...createTestToolConfig(tempDir.path, { workspaceId: "ws-1" }),
+ backgroundProcessManager,
+ taskService,
+ });
+
+ const result: unknown = await Promise.resolve(
+ tool.execute!({ task_ids: ["bash:proc-1"] }, mockToolCallOptions)
+ );
+
+ expect(getProcess).toHaveBeenCalledWith("proc-1");
+ expect(terminate).toHaveBeenCalledWith("proc-1");
+ expect(result).toEqual({
+ results: [
+ { status: "terminated", taskId: "bash:proc-1", terminatedTaskIds: ["bash:proc-1"] },
+ ],
+ });
+ });
+});
diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts
index 578758b0b7..d9f5a2cb6a 100644
--- a/src/node/services/tools/task.test.ts
+++ b/src/node/services/tools/task.test.ts
@@ -31,7 +31,7 @@ describe("task tool", () => {
const result: unknown = await Promise.resolve(
tool.execute!(
- { subagent_type: "explore", prompt: "do it", run_in_background: true },
+ { subagent_type: "explore", prompt: "do it", title: "Child task", run_in_background: true },
mockToolCallOptions
)
);
@@ -63,7 +63,12 @@ describe("task tool", () => {
const result: unknown = await Promise.resolve(
tool.execute!(
- { subagent_type: "explore", prompt: "do it", run_in_background: false },
+ {
+ subagent_type: "explore",
+ prompt: "do it",
+ title: "Child task",
+ run_in_background: false,
+ },
mockToolCallOptions
)
);
@@ -95,7 +100,10 @@ describe("task tool", () => {
let caught: unknown = null;
try {
await Promise.resolve(
- tool.execute!({ subagent_type: "explore", prompt: "do it" }, mockToolCallOptions)
+ tool.execute!(
+ { subagent_type: "explore", prompt: "do it", title: "Child task" },
+ mockToolCallOptions
+ )
);
} catch (error: unknown) {
caught = error;
@@ -131,7 +139,10 @@ describe("task tool", () => {
let caught: unknown = null;
try {
await Promise.resolve(
- tool.execute!({ subagent_type: "exec", prompt: "do it" }, mockToolCallOptions)
+ tool.execute!(
+ { subagent_type: "exec", prompt: "do it", title: "Child task" },
+ mockToolCallOptions
+ )
);
} catch (error: unknown) {
caught = error;
diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts
index 68500fb70e..475691d05f 100644
--- a/src/node/services/tools/task.ts
+++ b/src/node/services/tools/task.ts
@@ -1,25 +1,131 @@
import { tool } from "ai";
+import type { BashToolResult } from "@/common/types/tools";
import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools";
import { TaskToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions";
import { coerceThinkingLevel } from "@/common/types/thinking";
+import { createBashTool } from "./bash";
+import { toBashTaskId } from "./taskId";
import { parseToolResult, requireTaskService, requireWorkspaceId } from "./toolUtils";
+function formatBashReport(
+ args: { script: string; display_name: string },
+ result: BashToolResult
+): string {
+ const lines: string[] = [];
+
+ lines.push(`### Bash: ${args.display_name}`);
+ lines.push("");
+
+ lines.push("```bash");
+ lines.push(args.script.trimEnd());
+ lines.push("```");
+ lines.push("");
+
+ lines.push(`exitCode: ${result.exitCode}`);
+ lines.push(`wall_duration_ms: ${result.wall_duration_ms}`);
+
+ if ("truncated" in result && result.truncated) {
+ lines.push("");
+ lines.push("WARNING: output truncated");
+ lines.push(`reason: ${result.truncated.reason}`);
+ lines.push(`totalLines: ${result.truncated.totalLines}`);
+ }
+
+ if (!result.success) {
+ lines.push("");
+ lines.push(`error: ${result.error}`);
+ }
+
+ if (typeof result.output === "string" && result.output.length > 0) {
+ lines.push("");
+ lines.push("```text");
+ lines.push(result.output.trimEnd());
+ lines.push("```");
+ }
+
+ return lines.join("\n");
+}
+
export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
+ let bashTool: ReturnType | null = null;
+
return tool({
description: TOOL_DEFINITIONS.task.description,
inputSchema: TOOL_DEFINITIONS.task.schema,
- execute: async (args, { abortSignal }): Promise => {
+ execute: async (args, { abortSignal, toolCallId, messages }): Promise => {
+ // Defensive: tool() should have already validated args via inputSchema,
+ // but keep runtime validation here to preserve type-safety.
+ const parsedArgs = TOOL_DEFINITIONS.task.schema.safeParse(args);
+ if (!parsedArgs.success) {
+ throw new Error(`task tool input validation failed: ${parsedArgs.error.message}`);
+ }
+ const validatedArgs = parsedArgs.data;
+ if (abortSignal?.aborted) {
+ throw new Error("Interrupted");
+ }
+
+ // task(kind="bash") - run bash commands via the task abstraction.
+ if (validatedArgs.kind === "bash") {
+ const { script, timeout_secs, run_in_background, display_name } = validatedArgs;
+ if (!script || timeout_secs === undefined || !display_name) {
+ throw new Error("task tool input validation failed: expected bash task args");
+ }
+
+ bashTool ??= createBashTool(config);
+
+ const bashResult = (await bashTool.execute!(
+ {
+ script,
+ timeout_secs,
+ run_in_background,
+ display_name,
+ },
+ { abortSignal, toolCallId, messages }
+ )) as BashToolResult;
+
+ if (
+ bashResult.success &&
+ "backgroundProcessId" in bashResult &&
+ bashResult.backgroundProcessId
+ ) {
+ return parseToolResult(
+ TaskToolResultSchema,
+ { status: "running" as const, taskId: toBashTaskId(bashResult.backgroundProcessId) },
+ "task"
+ );
+ }
+
+ return parseToolResult(
+ TaskToolResultSchema,
+ {
+ status: "completed" as const,
+ reportMarkdown: formatBashReport({ script, display_name }, bashResult),
+ title: display_name,
+ exitCode: bashResult.exitCode,
+ note: "note" in bashResult ? bashResult.note : undefined,
+ truncated: "truncated" in bashResult ? bashResult.truncated : undefined,
+ },
+ "task"
+ );
+ }
+
+ const { subagent_type, prompt, title, run_in_background } = validatedArgs;
+ if (!subagent_type || !prompt || !title) {
+ throw new Error("task tool input validation failed: expected agent task args");
+ }
+
const workspaceId = requireWorkspaceId(config, "task");
const taskService = requireTaskService(config, "task");
- if (abortSignal?.aborted) {
- throw new Error("Interrupted");
+ // Disallow recursive sub-agent spawning.
+ if (config.enableAgentReport) {
+ throw new Error("Sub-agent workspaces may not spawn additional sub-agent tasks.");
}
// Plan mode is explicitly non-executing. Allow only read-only exploration tasks.
- if (config.mode === "plan" && args.subagent_type === "exec") {
+ if (config.mode === "plan" && subagent_type === "exec") {
throw new Error('In Plan Mode you may only spawn subagent_type: "explore" tasks.');
}
@@ -32,9 +138,9 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
const created = await taskService.create({
parentWorkspaceId: workspaceId,
kind: "agent",
- agentType: args.subagent_type,
- prompt: args.prompt,
- title: args.title,
+ agentType: subagent_type,
+ prompt,
+ title,
modelString,
thinkingLevel,
});
@@ -43,7 +149,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
throw new Error(created.error);
}
- if (args.run_in_background) {
+ if (run_in_background) {
return parseToolResult(
TaskToolResultSchema,
{ status: created.data.status, taskId: created.data.taskId },
@@ -63,7 +169,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
taskId: created.data.taskId,
reportMarkdown: report.reportMarkdown,
title: report.title,
- agentType: args.subagent_type,
+ agentType: subagent_type,
},
"task"
);
diff --git a/src/node/services/tools/taskId.ts b/src/node/services/tools/taskId.ts
new file mode 100644
index 0000000000..4466e1e3ff
--- /dev/null
+++ b/src/node/services/tools/taskId.ts
@@ -0,0 +1,24 @@
+import assert from "node:assert/strict";
+
+const BASH_TASK_ID_PREFIX = "bash:";
+
+export function toBashTaskId(processId: string): string {
+ assert(typeof processId === "string", "toBashTaskId: processId must be a string");
+ const trimmed = processId.trim();
+ assert(trimmed.length > 0, "toBashTaskId: processId must be non-empty");
+ return `${BASH_TASK_ID_PREFIX}${trimmed}`;
+}
+
+export function fromBashTaskId(taskId: string): string | null {
+ assert(typeof taskId === "string", "fromBashTaskId: taskId must be a string");
+ if (!taskId.startsWith(BASH_TASK_ID_PREFIX)) {
+ return null;
+ }
+
+ const processId = taskId.slice(BASH_TASK_ID_PREFIX.length).trim();
+ return processId.length > 0 ? processId : null;
+}
+
+export function isBashTaskId(taskId: string): boolean {
+ return fromBashTaskId(taskId) !== null;
+}
diff --git a/src/node/services/tools/task_await.test.ts b/src/node/services/tools/task_await.test.ts
index 12c411782e..acd45044de 100644
--- a/src/node/services/tools/task_await.test.ts
+++ b/src/node/services/tools/task_await.test.ts
@@ -160,4 +160,67 @@ describe("task_await tool", () => {
],
});
});
+
+ it("treats timeout_secs=0 as non-blocking for agent tasks", async () => {
+ using tempDir = new TestTempDir("test-task-await-tool-timeout-zero");
+ const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" });
+
+ const waitForAgentReport = mock(() => {
+ throw new Error("waitForAgentReport should not be called for timeout_secs=0");
+ });
+ const getAgentTaskStatus = mock(() => "running" as const);
+
+ const taskService = {
+ listActiveDescendantAgentTaskIds: mock(() => ["t1"]),
+ isDescendantAgentTask: mock(() => true),
+ getAgentTaskStatus,
+ waitForAgentReport,
+ } as unknown as TaskService;
+
+ const tool = createTaskAwaitTool({ ...baseConfig, taskService });
+
+ const result: unknown = await Promise.resolve(
+ tool.execute!({ timeout_secs: 0 }, mockToolCallOptions)
+ );
+
+ expect(result).toEqual({ results: [{ status: "running", taskId: "t1" }] });
+ expect(waitForAgentReport).toHaveBeenCalledTimes(0);
+ expect(getAgentTaskStatus).toHaveBeenCalledWith("t1");
+ });
+
+ it("returns completed result when timeout_secs=0 and a cached report is available", async () => {
+ using tempDir = new TestTempDir("test-task-await-tool-timeout-zero-cached");
+ const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" });
+
+ const getAgentTaskStatus = mock(() => null);
+ const waitForAgentReport = mock(() =>
+ Promise.resolve({ reportMarkdown: "ok", title: "cached-title" })
+ );
+
+ const taskService = {
+ listActiveDescendantAgentTaskIds: mock(() => ["t1"]),
+ isDescendantAgentTask: mock(() => true),
+ getAgentTaskStatus,
+ waitForAgentReport,
+ } as unknown as TaskService;
+
+ const tool = createTaskAwaitTool({ ...baseConfig, taskService });
+
+ const result: unknown = await Promise.resolve(
+ tool.execute!({ timeout_secs: 0 }, mockToolCallOptions)
+ );
+
+ expect(result).toEqual({
+ results: [
+ {
+ status: "completed",
+ taskId: "t1",
+ reportMarkdown: "ok",
+ title: "cached-title",
+ },
+ ],
+ });
+ expect(getAgentTaskStatus).toHaveBeenCalledWith("t1");
+ expect(waitForAgentReport).toHaveBeenCalledTimes(1);
+ });
});
diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts
index 7fa2065140..37b503b7dc 100644
--- a/src/node/services/tools/task_await.ts
+++ b/src/node/services/tools/task_await.ts
@@ -3,6 +3,7 @@ import { tool } from "ai";
import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools";
import { TaskAwaitToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions";
+import { fromBashTaskId, toBashTaskId } from "./taskId";
import {
dedupeStrings,
parseToolResult,
@@ -12,11 +13,43 @@ import {
function coerceTimeoutMs(timeoutSecs: unknown): number | undefined {
if (typeof timeoutSecs !== "number" || !Number.isFinite(timeoutSecs)) return undefined;
+ if (timeoutSecs < 0) return undefined;
const timeoutMs = Math.floor(timeoutSecs * 1000);
- if (timeoutMs <= 0) return undefined;
return timeoutMs;
}
+function coerceTimeoutSecs(timeoutSecs: unknown): number | undefined {
+ if (typeof timeoutSecs !== "number" || !Number.isFinite(timeoutSecs)) return undefined;
+ if (timeoutSecs < 0) return undefined;
+ return timeoutSecs;
+}
+
+function formatBashOutputReport(args: {
+ processId: string;
+ status: string;
+ exitCode?: number;
+ output: string;
+}): string {
+ const lines: string[] = [];
+
+ lines.push(`### Bash task: ${args.processId}`);
+ lines.push("");
+
+ lines.push(`status: ${args.status}`);
+ if (args.exitCode !== undefined) {
+ lines.push(`exitCode: ${args.exitCode}`);
+ }
+
+ if (args.output.trim().length > 0) {
+ lines.push("");
+ lines.push("```text");
+ lines.push(args.output.trimEnd());
+ lines.push("```");
+ }
+
+ return lines.join("\n");
+}
+
export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => {
return tool({
description: TOOL_DEFINITIONS.task_await.description,
@@ -26,14 +59,32 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => {
const taskService = requireTaskService(config, "task_await");
const timeoutMs = coerceTimeoutMs(args.timeout_secs);
+ const timeoutSecsForBash = coerceTimeoutSecs(args.timeout_secs) ?? 10 * 60;
const requestedIds: string[] | null =
args.task_ids && args.task_ids.length > 0 ? args.task_ids : null;
- const candidateTaskIds =
+ let candidateTaskIds: string[] =
requestedIds ?? taskService.listActiveDescendantAgentTaskIds(workspaceId);
+ if (!requestedIds && config.backgroundProcessManager) {
+ const processes = await config.backgroundProcessManager.list();
+ const bashTaskIds = processes
+ .filter((proc) => {
+ if (proc.status !== "running") return false;
+ return (
+ proc.workspaceId === workspaceId ||
+ taskService.isDescendantAgentTask(workspaceId, proc.workspaceId)
+ );
+ })
+ .map((proc) => toBashTaskId(proc.id));
+
+ candidateTaskIds = [...candidateTaskIds, ...bashTaskIds];
+ }
+
const uniqueTaskIds = dedupeStrings(candidateTaskIds);
+
+ const agentTaskIds = uniqueTaskIds.filter((taskId) => !taskId.startsWith("bash:"));
const bulkFilter = (
taskService as unknown as {
filterDescendantAgentTaskIds?: (
@@ -42,18 +93,117 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => {
) => string[];
}
).filterDescendantAgentTaskIds;
- const descendantTaskIdSet = new Set(
+ const descendantAgentTaskIdSet = new Set(
typeof bulkFilter === "function"
- ? bulkFilter.call(taskService, workspaceId, uniqueTaskIds)
- : uniqueTaskIds.filter((taskId) => taskService.isDescendantAgentTask(workspaceId, taskId))
+ ? bulkFilter.call(taskService, workspaceId, agentTaskIds)
+ : agentTaskIds.filter((taskId) => taskService.isDescendantAgentTask(workspaceId, taskId))
);
const results = await Promise.all(
uniqueTaskIds.map(async (taskId) => {
- if (!descendantTaskIdSet.has(taskId)) {
+ const maybeProcessId = fromBashTaskId(taskId);
+ if (taskId.startsWith("bash:") && !maybeProcessId) {
+ return { status: "error" as const, taskId, error: "Invalid bash taskId." };
+ }
+
+ if (maybeProcessId) {
+ if (!config.backgroundProcessManager) {
+ return {
+ status: "error" as const,
+ taskId,
+ error: "Background process manager not available",
+ };
+ }
+
+ const proc = await config.backgroundProcessManager.getProcess(maybeProcessId);
+ if (!proc) {
+ return { status: "not_found" as const, taskId };
+ }
+
+ const inScope =
+ proc.workspaceId === workspaceId ||
+ taskService.isDescendantAgentTask(workspaceId, proc.workspaceId);
+ if (!inScope) {
+ return { status: "invalid_scope" as const, taskId };
+ }
+
+ const outputResult = await config.backgroundProcessManager.getOutput(
+ maybeProcessId,
+ args.filter,
+ args.filter_exclude,
+ timeoutSecsForBash,
+ abortSignal,
+ workspaceId,
+ "task_await"
+ );
+
+ if (!outputResult.success) {
+ return { status: "error" as const, taskId, error: outputResult.error };
+ }
+
+ if (outputResult.status === "running" || outputResult.status === "interrupted") {
+ return {
+ status: "running" as const,
+ taskId,
+ output: outputResult.output,
+ elapsed_ms: outputResult.elapsed_ms,
+ note: outputResult.note,
+ };
+ }
+
+ return {
+ status: "completed" as const,
+ taskId,
+ title: proc.displayName ?? proc.id,
+ reportMarkdown: formatBashOutputReport({
+ processId: proc.id,
+ status: outputResult.status,
+ exitCode: outputResult.exitCode,
+ output: outputResult.output,
+ }),
+ output: outputResult.output,
+ elapsed_ms: outputResult.elapsed_ms,
+ exitCode: outputResult.exitCode,
+ note: outputResult.note,
+ };
+ }
+
+ if (!descendantAgentTaskIdSet.has(taskId)) {
return { status: "invalid_scope" as const, taskId };
}
+ // When timeout_secs=0 (or rounds down to 0ms), task_await should be non-blocking.
+ // `waitForAgentReport` asserts timeoutMs > 0, so handle 0 explicitly by returning the
+ // current task status instead of awaiting.
+ if (timeoutMs === 0) {
+ const status = taskService.getAgentTaskStatus(taskId);
+ if (status === "queued" || status === "running" || status === "awaiting_report") {
+ return { status, taskId };
+ }
+
+ // Best-effort: the task might already have a cached report (even if its workspace was
+ // cleaned up). Avoid blocking when it isn't available.
+ try {
+ const report = await taskService.waitForAgentReport(taskId, {
+ timeoutMs: 1,
+ abortSignal,
+ requestingWorkspaceId: workspaceId,
+ });
+ return {
+ status: "completed" as const,
+ taskId,
+ reportMarkdown: report.reportMarkdown,
+ title: report.title,
+ };
+ } catch (error: unknown) {
+ const message = error instanceof Error ? error.message : String(error);
+ if (/not found/i.test(message)) {
+ return { status: "not_found" as const, taskId };
+ }
+ return { status: "error" as const, taskId, error: message };
+ }
+ }
+
try {
const report = await taskService.waitForAgentReport(taskId, {
timeoutMs,
diff --git a/src/node/services/tools/task_list.ts b/src/node/services/tools/task_list.ts
index 1027793a8a..bd904f2fd6 100644
--- a/src/node/services/tools/task_list.ts
+++ b/src/node/services/tools/task_list.ts
@@ -3,6 +3,7 @@ import { tool } from "ai";
import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools";
import { TaskListToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions";
+import { toBashTaskId } from "./taskId";
import { parseToolResult, requireTaskService, requireWorkspaceId } from "./toolUtils";
const DEFAULT_STATUSES = ["queued", "running", "awaiting_report"] as const;
@@ -11,13 +12,44 @@ export const createTaskListTool: ToolFactory = (config: ToolConfiguration) => {
return tool({
description: TOOL_DEFINITIONS.task_list.description,
inputSchema: TOOL_DEFINITIONS.task_list.schema,
- execute: (args): unknown => {
+ execute: async (args): Promise => {
const workspaceId = requireWorkspaceId(config, "task_list");
const taskService = requireTaskService(config, "task_list");
const statuses =
args.statuses && args.statuses.length > 0 ? args.statuses : [...DEFAULT_STATUSES];
- const tasks = taskService.listDescendantAgentTasks(workspaceId, { statuses });
+
+ const agentTasks = taskService.listDescendantAgentTasks(workspaceId, { statuses });
+ const tasks = [...agentTasks];
+
+ if (config.backgroundProcessManager) {
+ const depthByWorkspaceId = new Map();
+ depthByWorkspaceId.set(workspaceId, 0);
+ for (const t of agentTasks) {
+ depthByWorkspaceId.set(t.taskId, t.depth);
+ }
+
+ const processes = await config.backgroundProcessManager.list();
+ for (const proc of processes) {
+ const inScope =
+ proc.workspaceId === workspaceId ||
+ taskService.isDescendantAgentTask(workspaceId, proc.workspaceId);
+ if (!inScope) continue;
+
+ const status = proc.status === "running" ? "running" : "reported";
+ if (!statuses.includes(status)) continue;
+
+ const parentDepth = depthByWorkspaceId.get(proc.workspaceId) ?? 0;
+ tasks.push({
+ taskId: toBashTaskId(proc.id),
+ status,
+ parentWorkspaceId: proc.workspaceId,
+ title: proc.displayName ?? proc.id,
+ createdAt: new Date(proc.startTime).toISOString(),
+ depth: parentDepth + 1,
+ });
+ }
+ }
return parseToolResult(TaskListToolResultSchema, { tasks }, "task_list");
},
diff --git a/src/node/services/tools/task_terminate.ts b/src/node/services/tools/task_terminate.ts
index 20d9460c3a..9f67d67c95 100644
--- a/src/node/services/tools/task_terminate.ts
+++ b/src/node/services/tools/task_terminate.ts
@@ -6,6 +6,7 @@ import {
TOOL_DEFINITIONS,
} from "@/common/utils/tools/toolDefinitions";
+import { fromBashTaskId } from "./taskId";
import {
dedupeStrings,
parseToolResult,
@@ -25,6 +26,44 @@ export const createTaskTerminateTool: ToolFactory = (config: ToolConfiguration)
const results = await Promise.all(
uniqueTaskIds.map(async (taskId) => {
+ const maybeProcessId = fromBashTaskId(taskId);
+ if (taskId.startsWith("bash:") && !maybeProcessId) {
+ return { status: "error" as const, taskId, error: "Invalid bash taskId." };
+ }
+
+ if (maybeProcessId) {
+ if (!config.backgroundProcessManager) {
+ return {
+ status: "error" as const,
+ taskId,
+ error: "Background process manager not available",
+ };
+ }
+
+ const proc = await config.backgroundProcessManager.getProcess(maybeProcessId);
+ if (!proc) {
+ return { status: "not_found" as const, taskId };
+ }
+
+ const inScope =
+ proc.workspaceId === workspaceId ||
+ taskService.isDescendantAgentTask(workspaceId, proc.workspaceId);
+ if (!inScope) {
+ return { status: "invalid_scope" as const, taskId };
+ }
+
+ const terminateResult = await config.backgroundProcessManager.terminate(maybeProcessId);
+ if (!terminateResult.success) {
+ return { status: "error" as const, taskId, error: terminateResult.error };
+ }
+
+ return {
+ status: "terminated" as const,
+ taskId,
+ terminatedTaskIds: [taskId],
+ };
+ }
+
const terminateResult = await taskService.terminateDescendantAgentTask(
workspaceId,
taskId
diff --git a/tests/ipc/backgroundBash.test.ts b/tests/ipc/backgroundBash.test.ts
index bf30191ebe..34ca7f20c1 100644
--- a/tests/ipc/backgroundBash.test.ts
+++ b/tests/ipc/backgroundBash.test.ts
@@ -22,67 +22,111 @@ import {
generateBranchName,
createWorkspaceWithInit,
sendMessageAndWait,
- extractTextFromEvents,
HAIKU_MODEL,
} from "./helpers";
import type { WorkspaceChatMessage } from "../../src/common/orpc/types";
import type { ToolPolicy } from "../../src/common/utils/tools/toolPolicy";
-// Tool policy: Allow bash and bash_background_* tools (bash prefix matches all)
-const BACKGROUND_TOOLS: ToolPolicy = [
- { regex_match: "bash", action: "enable" },
- { regex_match: "file_.*", action: "disable" },
+// Tool policy: Enable only task* tools (task, task_list, task_await, task_terminate).
+const TASK_TOOLS: ToolPolicy = [
+ { regex_match: ".*", action: "disable" },
+ { regex_match: "task.*", action: "enable" },
];
// Extended timeout for tests making multiple AI calls
const BACKGROUND_TEST_TIMEOUT_MS = 75000;
/**
- * Extract process ID from bash tool output containing "Background process started with ID: xxx"
- * The process ID is now the display_name, which can be any string like "Sleep Process" or "bash_123"
+ * Extract a bash taskId (e.g. "bash:") from task(kind="bash") results.
*/
-function extractProcessId(events: WorkspaceChatMessage[]): string | null {
+function extractBashTaskId(events: WorkspaceChatMessage[]): string | null {
for (const event of events) {
- if (
- "type" in event &&
- event.type === "tool-call-end" &&
- "toolName" in event &&
- event.toolName === "bash"
- ) {
- const result = (event as { result?: { output?: string } }).result?.output;
- if (typeof result === "string") {
- // Match any non-empty process ID after "Background process started with ID: "
- const match = result.match(/Background process started with ID: (.+)$/);
- if (match) return match[1].trim();
+ if (!("type" in event) || event.type !== "tool-call-end") continue;
+ if (!("toolName" in event) || event.toolName !== "task") continue;
+
+ const taskId = (event as { result?: { taskId?: string } }).result?.taskId;
+ if (typeof taskId !== "string") continue;
+
+ const trimmed = taskId.trim();
+ if (trimmed.startsWith("bash:")) return trimmed;
+ }
+ return null;
+}
+
+/**
+ * Extract taskIds from a task_list tool result.
+ */
+function extractTaskListTaskIds(events: WorkspaceChatMessage[]): string[] {
+ for (const event of events) {
+ if (!("type" in event) || event.type !== "tool-call-end") continue;
+ if (!("toolName" in event) || event.toolName !== "task_list") continue;
+
+ const tasks = (event as { result?: { tasks?: Array<{ taskId?: string }> } }).result?.tasks;
+ if (!Array.isArray(tasks)) return [];
+
+ return tasks
+ .map((t) => t.taskId)
+ .filter((taskId): taskId is string => typeof taskId === "string");
+ }
+ return [];
+}
+
+/**
+ * Collect output strings from task_await tool results.
+ */
+function collectTaskAwaitOutputs(events: WorkspaceChatMessage[]): string {
+ const outputs: string[] = [];
+
+ for (const event of events) {
+ if (!("type" in event) || event.type !== "tool-call-end") continue;
+ if (!("toolName" in event) || event.toolName !== "task_await") continue;
+
+ const results = (
+ event as { result?: { results?: Array<{ output?: string; reportMarkdown?: string }> } }
+ ).result?.results;
+
+ if (!Array.isArray(results)) continue;
+
+ for (const result of results) {
+ if (typeof result.output === "string" && result.output.length > 0) {
+ outputs.push(result.output);
+ continue;
+ }
+ if (typeof result.reportMarkdown === "string" && result.reportMarkdown.length > 0) {
+ outputs.push(result.reportMarkdown);
}
}
}
- return null;
+
+ return outputs.join("\n");
}
/**
- * Check if any tool output contains a specific string
+ * Extract terminated task ids from a task_terminate tool result.
*/
-function toolOutputContains(
- events: WorkspaceChatMessage[],
- toolName: string,
- substring: string
-): boolean {
+function extractTerminatedTaskIds(events: WorkspaceChatMessage[]): string[] {
for (const event of events) {
- if (
- "type" in event &&
- event.type === "tool-call-end" &&
- "toolName" in event &&
- event.toolName === toolName
- ) {
- const result = (event as { result?: { output?: string; message?: string } }).result;
- const text = result?.output ?? result?.message;
- if (typeof text === "string" && text.includes(substring)) {
- return true;
+ if (!("type" in event) || event.type !== "tool-call-end") continue;
+ if (!("toolName" in event) || event.toolName !== "task_terminate") continue;
+
+ const results = (
+ event as {
+ result?: {
+ results?: Array<{ status?: string; terminatedTaskIds?: string[] }>;
+ };
}
+ ).result?.results;
+ if (!Array.isArray(results)) return [];
+
+ const terminated: string[] = [];
+ for (const result of results) {
+ if (result.status !== "terminated") continue;
+ if (!Array.isArray(result.terminatedTaskIds)) continue;
+ terminated.push(...result.terminatedTaskIds);
}
+ return terminated;
}
- return false;
+ return [];
}
// Skip all tests if TEST_INTEGRATION is not set
@@ -119,47 +163,44 @@ describeIntegration("Background Bash Execution", () => {
);
try {
- // Start a background process using explicit tool call instruction
+ // Start a background bash task via task(kind="bash")
const startEvents = await sendMessageAndWait(
env,
workspaceId,
- "Use the bash tool with run_in_background=true to run: true && sleep 30",
+ 'Use the task tool with args: { kind: "bash", script: "true && sleep 30", timeout_secs: 60, run_in_background: true, display_name: "bg-basic" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
30000
);
- // Extract process ID from tool output (now uses display_name)
- const processId = extractProcessId(startEvents);
- expect(processId).not.toBeNull();
- expect(processId!.length).toBeGreaterThan(0);
+ const taskId = extractBashTaskId(startEvents);
+ expect(taskId).not.toBeNull();
+ expect(taskId!.startsWith("bash:")).toBe(true);
- // List background processes to verify it's tracked
+ // List tasks to verify it's tracked
const listEvents = await sendMessageAndWait(
env,
workspaceId,
- "Use the bash_background_list tool to show running background processes",
+ "Use task_list to show running tasks.",
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
20000
);
- // Verify the process appears in the list
- const responseText = extractTextFromEvents(listEvents);
- expect(
- responseText.includes(processId!) ||
- toolOutputContains(listEvents, "bash_background_list", processId!)
- ).toBe(true);
+ const listedTaskIds = extractTaskListTaskIds(listEvents);
+ expect(listedTaskIds).toContain(taskId!);
// Clean up: terminate the background process
- await sendMessageAndWait(
+ const terminateEvents = await sendMessageAndWait(
env,
workspaceId,
- `Use bash_background_terminate to terminate process ${processId}`,
+ `Use task_terminate with task_ids: ["${taskId}"] to terminate the task.`,
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
20000
);
+ const terminatedTaskIds = extractTerminatedTaskIds(terminateEvents);
+ expect(terminatedTaskIds).toContain(taskId!);
} finally {
await cleanup();
}
@@ -196,53 +237,44 @@ describeIntegration("Background Bash Execution", () => {
);
try {
- // Start a long-running background process
+ // Start a long-running background bash task
const startEvents = await sendMessageAndWait(
env,
workspaceId,
- "Use bash with run_in_background=true to run: true && sleep 300",
+ 'Use the task tool with args: { kind: "bash", script: "true && sleep 300", timeout_secs: 600, run_in_background: true, display_name: "bg-terminate" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
30000
);
- const processId = extractProcessId(startEvents);
- expect(processId).not.toBeNull();
+ const taskId = extractBashTaskId(startEvents);
+ expect(taskId).not.toBeNull();
- // Terminate the process
+ // Terminate the task
const terminateEvents = await sendMessageAndWait(
env,
workspaceId,
- `Use bash_background_terminate to terminate process ${processId}`,
+ `Use task_terminate with task_ids: ["${taskId}"] to terminate the task.`,
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
20000
);
- // Verify termination succeeded (tool output should indicate success)
- const terminateSuccess =
- toolOutputContains(terminateEvents, "bash_background_terminate", "terminated") ||
- toolOutputContains(terminateEvents, "bash_background_terminate", "success") ||
- toolOutputContains(terminateEvents, "bash_background_terminate", processId!);
- expect(terminateSuccess).toBe(true);
+ const terminatedTaskIds = extractTerminatedTaskIds(terminateEvents);
+ expect(terminatedTaskIds).toContain(taskId!);
- // List to verify status changed to killed
+ // List to verify the task remains discoverable (including reported)
const listEvents = await sendMessageAndWait(
env,
workspaceId,
- "Use bash_background_list to show all background processes including terminated ones",
+ 'Use task_list with statuses: ["queued", "running", "awaiting_report", "reported"].',
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
20000
);
- // Process should show as killed/terminated
- const listResponse = extractTextFromEvents(listEvents);
- expect(
- listResponse.toLowerCase().includes("killed") ||
- listResponse.toLowerCase().includes("terminated") ||
- toolOutputContains(listEvents, "bash_background_list", "killed")
- ).toBe(true);
+ const listedTaskIds = extractTaskListTaskIds(listEvents);
+ expect(listedTaskIds).toContain(taskId!);
} finally {
await cleanup();
}
@@ -284,42 +316,27 @@ describeIntegration("Background Bash Execution", () => {
const startEvents = await sendMessageAndWait(
env,
workspaceId,
- `Use bash with run_in_background=true to run: echo "${marker}" && sleep 1`,
+ `Use the task tool with args: { kind: "bash", script: "echo \"${marker}\" && sleep 1", timeout_secs: 30, run_in_background: true, display_name: "bg-output" }. Do not spawn a sub-agent.`,
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
30000
);
- const processId = extractProcessId(startEvents);
- expect(processId).not.toBeNull();
-
- // Wait for process to complete and output to be written
- await new Promise((resolve) => setTimeout(resolve, 2000));
+ const taskId = extractBashTaskId(startEvents);
+ expect(taskId).not.toBeNull();
- // List processes - should show the marker in output or process details
- const listEvents = await sendMessageAndWait(
+ // Wait for the process to complete and retrieve its output
+ const awaitEvents = await sendMessageAndWait(
env,
workspaceId,
- `Use bash_background_list to show details of background processes`,
+ `Use task_await with task_ids: ["${taskId}"] and timeout_secs: 10 to retrieve output.`,
HAIKU_MODEL,
- BACKGROUND_TOOLS,
+ TASK_TOOLS,
20000
);
- // The process should have exited (status: exited) after sleep completes
- const listResponse = extractTextFromEvents(listEvents);
- const hasExited =
- listResponse.toLowerCase().includes("exited") ||
- listResponse.toLowerCase().includes("completed") ||
- toolOutputContains(listEvents, "bash_background_list", "exited");
-
- // Process may still be running or just finished - either is acceptable
- // The main assertion is that the process was tracked
- expect(
- hasExited ||
- listResponse.includes(processId!) ||
- toolOutputContains(listEvents, "bash_background_list", processId!)
- ).toBe(true);
+ const output = collectTaskAwaitOutputs(awaitEvents);
+ expect(output).toContain(marker);
} finally {
await cleanup();
}
diff --git a/tests/ipc/backgroundBashDirect.test.ts b/tests/ipc/backgroundBashDirect.test.ts
index b7d95e283d..4e108c59cd 100644
--- a/tests/ipc/backgroundBashDirect.test.ts
+++ b/tests/ipc/backgroundBashDirect.test.ts
@@ -20,25 +20,20 @@ import * as path from "path";
import { createTestEnvironment, cleanupTestEnvironment, type TestEnvironment } from "./setup";
import { createTempGitRepo, cleanupTempGitRepo, generateBranchName } from "./helpers";
import { detectDefaultTrunkBranch } from "../../src/node/git";
-import { getToolsForModel } from "../../src/common/utils/tools/tools";
import { LocalRuntime } from "../../src/node/runtime/LocalRuntime";
import { BackgroundProcessManager } from "../../src/node/services/backgroundProcessManager";
-import type { InitStateManager } from "../../src/node/services/initStateManager";
+import { createBashTool } from "../../src/node/services/tools/bash";
+import { createBashOutputTool } from "../../src/node/services/tools/bash_output";
// Access private fields from ServiceContainer for direct testing
interface ServiceContainerPrivates {
backgroundProcessManager: BackgroundProcessManager;
- initStateManager: InitStateManager;
}
function getBackgroundProcessManager(env: TestEnvironment): BackgroundProcessManager {
return (env.services as unknown as ServiceContainerPrivates).backgroundProcessManager;
}
-function getInitStateManager(env: TestEnvironment): InitStateManager {
- return (env.services as unknown as ServiceContainerPrivates).initStateManager;
-}
-
interface ToolExecuteResult {
success: boolean;
backgroundProcessId?: string;
@@ -82,31 +77,30 @@ describe("Background Bash Direct Integration", () => {
});
it("should retrieve output after tools are recreated (multi-message flow)", async () => {
- // Simulates production flow where tools are recreated between messages
+ // Simulates production flow where tool instances are recreated between messages
const manager = getBackgroundProcessManager(env);
- const initStateManager = getInitStateManager(env);
const runtime = new LocalRuntime(workspacePath);
const marker = `MULTI_MSG_${Date.now()}`;
+ const toolConfig = {
+ cwd: workspacePath,
+ runtime,
+ secrets: {},
+ muxEnv: {},
+ runtimeTempDir: "/tmp",
+ backgroundProcessManager: manager,
+ workspaceId,
+ };
+
// Message 1: Spawn background process
- const tools1 = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
+ const bash1 = createBashTool(toolConfig);
+ const spawnResult = (await bash1.execute!(
{
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
+ script: `echo "${marker}"`,
+ run_in_background: true,
+ display_name: `spawn_${Date.now()}`,
+ timeout_secs: 30,
},
- workspaceId,
- initStateManager,
- {}
- );
-
- const spawnResult = (await tools1.bash.execute!(
- { script: `echo "${marker}"`, run_in_background: true },
{ toolCallId: "spawn", messages: [] }
)) as ToolExecuteResult;
@@ -116,24 +110,9 @@ describe("Background Bash Direct Integration", () => {
await new Promise((resolve) => setTimeout(resolve, 200));
// Message 2: Read with NEW tool instances (same manager)
- const tools2 = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
-
- const outputResult = (await tools2.bash_output.execute!(
- { process_id: processId },
+ const bashOutput2 = createBashOutputTool(toolConfig);
+ const outputResult = (await bashOutput2.execute!(
+ { process_id: processId, timeout_secs: 0 },
{ toolCallId: "read", messages: [] }
)) as ToolExecuteResult;
@@ -416,34 +395,29 @@ describe("Foreground to Background Migration", () => {
// 4. Process continues running and output is accessible via bash_output
const manager = getBackgroundProcessManager(env);
- const initStateManager = getInitStateManager(env);
const runtime = new LocalRuntime(workspacePath);
+ const toolConfig = {
+ cwd: workspacePath,
+ runtime,
+ secrets: {},
+ muxEnv: {},
+ runtimeTempDir: "/tmp",
+ backgroundProcessManager: manager,
+ workspaceId,
+ };
+
const testId = `fg_to_bg_${Date.now()}`;
const marker1 = `BEFORE_BG_${testId}`;
const marker2 = `AFTER_BG_${testId}`;
// Create tools for "message 1"
- const tools1 = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
+ const bash1 = createBashTool(toolConfig);
// Start foreground bash that runs for ~3 seconds
// Script: output marker1, sleep, output marker2
const toolCallId = `tool_${testId}`;
- const bashPromise = tools1.bash.execute!(
+ const bashPromise = bash1.execute!(
{
script: `echo "${marker1}"; sleep 2; echo "${marker2}"`,
run_in_background: false,
@@ -487,27 +461,13 @@ describe("Foreground to Background Migration", () => {
// === Simulate new message (stream ends, new stream begins) ===
// Create NEW tool instances (same manager reference, fresh tools)
- const tools2 = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
+ const bashOutput2 = createBashOutputTool(toolConfig);
// Wait for process to complete (marker2 should appear)
await new Promise((resolve) => setTimeout(resolve, 2500));
// Get output via bash_output tool (new tool instance)
- const outputResult = (await tools2.bash_output.execute!(
+ const outputResult = (await bashOutput2.execute!(
{ process_id: testId, timeout_secs: 0 },
{ toolCallId: "output_read", messages: [] }
)) as ToolExecuteResult;
@@ -525,34 +485,29 @@ describe("Foreground to Background Migration", () => {
// after migration and accessible in subsequent messages
const manager = getBackgroundProcessManager(env);
- const initStateManager = getInitStateManager(env);
const runtime = new LocalRuntime(workspacePath);
+ const toolConfig = {
+ cwd: workspacePath,
+ runtime,
+ secrets: {},
+ muxEnv: {},
+ runtimeTempDir: "/tmp",
+ backgroundProcessManager: manager,
+ workspaceId,
+ };
+
const testId = `preserve_output_${Date.now()}`;
const marker1 = `EARLY_${testId}`;
const marker2 = `LATE_${testId}`;
- const tools1 = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
+ const bash1 = createBashTool(toolConfig);
const toolCallId = `tool_${testId}`;
// Script outputs marker1, sleeps, then outputs marker2
const script = `echo "${marker1}"; sleep 2; echo "${marker2}"`;
- const bashPromise = tools1.bash.execute!(
+ const bashPromise = bash1.execute!(
{
script,
run_in_background: false,
@@ -592,32 +547,27 @@ describe("Foreground to Background Migration", () => {
it("should handle migration when process exits during send", async () => {
// Edge case: process exits right as we try to background it
const manager = getBackgroundProcessManager(env);
- const initStateManager = getInitStateManager(env);
const runtime = new LocalRuntime(workspacePath);
+ const toolConfig = {
+ cwd: workspacePath,
+ runtime,
+ secrets: {},
+ muxEnv: {},
+ runtimeTempDir: "/tmp",
+ backgroundProcessManager: manager,
+ workspaceId,
+ };
+
const testId = `fast_exit_${Date.now()}`;
const marker = `QUICK_${testId}`;
- const tools = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
+ const bash = createBashTool(toolConfig);
const toolCallId = `tool_${testId}`;
// Very fast script
- const bashPromise = tools.bash.execute!(
+ const bashPromise = bash.execute!(
{
script: `echo "${marker}"`,
run_in_background: false,
@@ -646,9 +596,18 @@ describe("Foreground to Background Migration", () => {
// new message), the abort signal would kill the process with exit code -997.
const manager = getBackgroundProcessManager(env);
- const initStateManager = getInitStateManager(env);
const runtime = new LocalRuntime(workspacePath);
+ const toolConfig = {
+ cwd: workspacePath,
+ runtime,
+ secrets: {},
+ muxEnv: {},
+ runtimeTempDir: "/tmp",
+ backgroundProcessManager: manager,
+ workspaceId,
+ };
+
const testId = `abort_after_bg_${Date.now()}`;
const marker1 = `BEFORE_${testId}`;
const marker2 = `AFTER_${testId}`;
@@ -656,26 +615,12 @@ describe("Foreground to Background Migration", () => {
// Create an AbortController to simulate stream abort
const abortController = new AbortController();
- const tools = await getToolsForModel(
- "anthropic:claude-sonnet-4-20250514",
- {
- cwd: workspacePath,
- runtime,
- secrets: {},
- muxEnv: {},
- runtimeTempDir: "/tmp",
- backgroundProcessManager: manager,
- workspaceId,
- },
- workspaceId,
- initStateManager,
- {}
- );
+ const bash = createBashTool(toolConfig);
const toolCallId = `tool_${testId}`;
// Start a foreground bash with the abort signal
- const bashPromise = tools.bash.execute!(
+ const bashPromise = bash.execute!(
{
script: `echo "${marker1}"; sleep 2; echo "${marker2}"`,
run_in_background: false,
diff --git a/tests/ipc/forkWorkspace.test.ts b/tests/ipc/forkWorkspace.test.ts
index b976054329..baa02959ae 100644
--- a/tests/ipc/forkWorkspace.test.ts
+++ b/tests/ipc/forkWorkspace.test.ts
@@ -94,6 +94,8 @@ describeIntegration("Workspace fork", () => {
// User expects: forked workspace is functional - can send messages to it
const collector = createStreamCollector(env.orpc, forkedWorkspaceId);
collector.start();
+ await collector.waitForSubscription();
+
const sendResult = await sendMessageWithModel(
env,
forkedWorkspaceId,
@@ -149,6 +151,8 @@ describeIntegration("Workspace fork", () => {
// Send a message that requires the historical context
const collector = createStreamCollector(env.orpc, forkedWorkspaceId);
collector.start();
+ await collector.waitForSubscription();
+
const sendResult = await sendMessageWithModel(
env,
forkedWorkspaceId,
@@ -202,6 +206,10 @@ describeIntegration("Workspace fork", () => {
const forkedCollector = createStreamCollector(env.orpc, forkedWorkspaceId);
sourceCollector.start();
forkedCollector.start();
+ await Promise.all([
+ sourceCollector.waitForSubscription(),
+ forkedCollector.waitForSubscription(),
+ ]);
// Send different messages to both concurrently
const [sourceResult, forkedResult] = await Promise.all([
@@ -251,6 +259,7 @@ describeIntegration("Workspace fork", () => {
// Start collector before starting stream
const sourceCollector = createStreamCollector(env.orpc, sourceWorkspaceId);
sourceCollector.start();
+ await sourceCollector.waitForSubscription();
// Start a stream in the source workspace (don't await)
void sendMessageWithModel(
@@ -284,6 +293,8 @@ describeIntegration("Workspace fork", () => {
// Send a message to the forked workspace
const forkedCollector = createStreamCollector(env.orpc, forkedWorkspaceId);
forkedCollector.start();
+ await forkedCollector.waitForSubscription();
+
const forkedSendResult = await sendMessageWithModel(
env,
forkedWorkspaceId,
diff --git a/tests/ipc/ollama.test.ts b/tests/ipc/ollama.test.ts
index db5f3ade68..e709d02568 100644
--- a/tests/ipc/ollama.test.ts
+++ b/tests/ipc/ollama.test.ts
@@ -138,12 +138,15 @@ describeOllama("Ollama integration", () => {
const collector = createStreamCollector(env.orpc, workspaceId);
collector.start();
try {
- // Ask for current time which should trigger bash tool
+ // Ask for current time which should trigger task(kind="bash")
const result = await sendMessageWithModel(
env,
workspaceId,
- "What is the current date and time? Use the bash tool to find out.",
- modelString("ollama", OLLAMA_MODEL)
+ 'Use task(kind="bash") to run: date. Set display_name="current-time" and timeout_secs=30. Do not spawn a sub-agent.',
+ modelString("ollama", OLLAMA_MODEL),
+ {
+ toolPolicy: [{ regex_match: "task", action: "require" }],
+ }
);
expect(result.success).toBe(true);
@@ -153,20 +156,32 @@ describeOllama("Ollama integration", () => {
assertStreamSuccess(collector);
- // Verify bash tool was called via events
+ // Verify task(kind="bash") was called via events
const events = collector.getEvents();
const toolCallStarts = events.filter((e: any) => e.type === "tool-call-start");
expect(toolCallStarts.length).toBeGreaterThan(0);
- const bashCall = toolCallStarts.find((e: any) => e.toolName === "bash");
- expect(bashCall).toBeDefined();
+ const taskCall = toolCallStarts.find(
+ (e: any) => e.toolName === "task" && e.args && e.args.kind === "bash"
+ );
+ expect(taskCall).toBeDefined();
- // Verify we got a text response with date/time info
+ // Verify we got a response and/or tool report with date/time info
const deltas = collector.getDeltas();
const responseText = extractTextFromEvents(deltas).toLowerCase();
- // Should mention time or date in response
- expect(responseText).toMatch(/time|date|am|pm|2024|2025/i);
+ const toolCallEnds = events.filter(
+ (e: any) =>
+ e.type === "tool-call-end" && e.toolName === "task" && e.args && e.args.kind === "bash"
+ );
+ const taskReport = toolCallEnds
+ .map((e: any) => e.result?.reportMarkdown)
+ .filter((t: any) => typeof t === "string")
+ .join("\n")
+ .toLowerCase();
+
+ // Should mention time or date in response or in the tool report
+ expect(`${responseText}\n${taskReport}`).toMatch(/time|date|am|pm|\d{2}:\d{2}|20\d{2}/i);
} finally {
collector.stop();
await cleanup();
diff --git a/tests/ipc/resumeStream.test.ts b/tests/ipc/resumeStream.test.ts
index a362af63ad..52cc94f07c 100644
--- a/tests/ipc/resumeStream.test.ts
+++ b/tests/ipc/resumeStream.test.ts
@@ -34,8 +34,11 @@ describeIntegration("resumeStream", () => {
void sendMessageWithModel(
env,
workspaceId,
- `Run this bash command: for i in 1 2 3; do sleep 0.5; done && echo '${expectedWord}'`,
- modelString("anthropic", "claude-sonnet-4-5")
+ `Use task(kind="bash") to run: for i in {1..10}; do sleep 0.5; done && echo '${expectedWord}'. Set display_name="resume-test" and timeout_secs=120. Do not spawn a sub-agent.`,
+ modelString("anthropic", "claude-sonnet-4-5"),
+ {
+ toolPolicy: [{ regex_match: "task", action: "require" }],
+ }
);
// Wait for stream to start
diff --git a/tests/ipc/runtimeExecuteBash.test.ts b/tests/ipc/runtimeExecuteBash.test.ts
index 5a9779dc5c..bc1e16456f 100644
--- a/tests/ipc/runtimeExecuteBash.test.ts
+++ b/tests/ipc/runtimeExecuteBash.test.ts
@@ -35,11 +35,8 @@ import type { RuntimeConfig } from "../../src/common/types/runtime";
import type { WorkspaceChatMessage } from "../../src/common/orpc/types";
import type { ToolPolicy } from "../../src/common/utils/tools/toolPolicy";
-// Tool policy: Only allow bash tool
-const BASH_ONLY: ToolPolicy = [
- { regex_match: "bash", action: "enable" },
- { regex_match: "file_.*", action: "disable" },
-];
+// Tool policy: Only allow the unified task tool (used as task(kind="bash")).
+const TASK_BASH_ONLY: ToolPolicy = [{ regex_match: "task", action: "require" }];
/**
* Collect tool outputs from stream events
@@ -54,8 +51,9 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s
event.toolName === toolName
)
.map((event) => {
- const result = (event as { result?: { output?: string } }).result?.output;
- return typeof result === "string" ? result : "";
+ const result = (event as { result?: { output?: string; reportMarkdown?: string } }).result;
+ const text = toolName === "task" ? result?.reportMarkdown : result?.output;
+ return typeof text === "string" ? text : "";
})
.join("\n");
}
@@ -67,13 +65,23 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s
function getToolDuration(events: WorkspaceChatMessage[], toolName: string): number {
const startEvent = events.find(
(e) => "type" in e && e.type === "tool-call-start" && "toolName" in e && e.toolName === toolName
- ) as { timestamp?: number } | undefined;
+ ) as { toolCallId?: string; timestamp?: number } | undefined;
+
+ if (!startEvent?.toolCallId || !startEvent.timestamp) {
+ return -1;
+ }
const endEvent = events.find(
- (e) => "type" in e && e.type === "tool-call-end" && "toolName" in e && e.toolName === toolName
+ (e) =>
+ "type" in e &&
+ e.type === "tool-call-end" &&
+ "toolName" in e &&
+ e.toolName === toolName &&
+ "toolCallId" in e &&
+ e.toolCallId === startEvent.toolCallId
) as { timestamp?: number } | undefined;
- if (startEvent?.timestamp && endEvent?.timestamp) {
+ if (endEvent?.timestamp) {
return endEvent.timestamp - startEvent.timestamp;
}
return -1;
@@ -161,23 +169,33 @@ describeIntegration("Runtime Bash Execution", () => {
const events = await sendMessageAndWait(
env,
workspaceId,
- 'Run the bash command "echo Hello World"',
+ 'Use the task tool with args: { kind: "bash", script: "echo Hello World", timeout_secs: 30, run_in_background: false, display_name: "echo-hello" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BASH_ONLY
+ TASK_BASH_ONLY
);
// Extract response text
const responseText = extractTextFromEvents(events);
- // Verify the command output appears in the response
- expect(responseText.toLowerCase()).toContain("hello world");
+ // Verify the command output appears in the task tool result.
+ const taskOutput = collectToolOutputs(events, "task");
+ expect(taskOutput.toLowerCase()).toContain("hello world");
+
+ // responseText might be empty if the model doesn't comment on the output.
+ if (responseText) {
+ expect(responseText.toLowerCase()).toContain("hello world");
+ }
- // Verify bash tool was called
+ // Verify task(kind="bash") was called
const toolCallStarts = events.filter(
(e) => "type" in e && e.type === "tool-call-start"
);
- const bashCall = toolCallStarts.find((e) => "toolName" in e && e.toolName === "bash");
- expect(bashCall).toBeDefined();
+ const taskCall = toolCallStarts.find((e) => {
+ if (!("toolName" in e) || e.toolName !== "task") return false;
+ const args = (e as { args?: { kind?: string } }).args;
+ return args?.kind === "bash";
+ });
+ expect(taskCall).toBeDefined();
} finally {
await cleanup();
}
@@ -220,23 +238,33 @@ describeIntegration("Runtime Bash Execution", () => {
const events = await sendMessageAndWait(
env,
workspaceId,
- 'Run bash command: export TEST_VAR="test123" && echo "Value: $TEST_VAR"',
+ 'Use the task tool with args: { kind: "bash", script: "export TEST_VAR=test123 && echo Value:$TEST_VAR", timeout_secs: 30, run_in_background: false, display_name: "env-var" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BASH_ONLY
+ TASK_BASH_ONLY
);
// Extract response text
const responseText = extractTextFromEvents(events);
- // Verify the env var value appears
- expect(responseText).toContain("test123");
+ // Verify the env var value appears in the task tool output.
+ const taskOutput = collectToolOutputs(events, "task");
+ expect(taskOutput).toContain("test123");
- // Verify bash tool was called
+ // responseText might be empty if the model doesn't comment on the output.
+ if (responseText) {
+ expect(responseText).toContain("test123");
+ }
+
+ // Verify task(kind="bash") was called
const toolCallStarts = events.filter(
(e) => "type" in e && e.type === "tool-call-start"
);
- const bashCall = toolCallStarts.find((e) => "toolName" in e && e.toolName === "bash");
- expect(bashCall).toBeDefined();
+ const taskCall = toolCallStarts.find((e) => {
+ if (!("toolName" in e) || e.toolName !== "task") return false;
+ const args = (e as { args?: { kind?: string } }).args;
+ return args?.kind === "bash";
+ });
+ expect(taskCall).toBeDefined();
} finally {
await cleanup();
}
@@ -275,37 +303,28 @@ describeIntegration("Runtime Bash Execution", () => {
);
try {
- // Create a test file with JSON content
- await sendMessageAndWait(
- env,
- workspaceId,
- 'Run bash: echo \'{"test": "data"}\' > /tmp/test.json',
- HAIKU_MODEL,
- BASH_ONLY
- );
-
- // Test command that pipes file through stdin-reading command (grep)
+ // Test command that pipes a file through a stdin-reading command (grep)
// This would hang forever if stdin.close() was used instead of stdin.abort()
// Regression test for: https://github.com/coder/mux/issues/503
const events = await sendMessageAndWait(
env,
workspaceId,
- "Run bash: cat /tmp/test.json | grep test",
+ 'Use the task tool with args: { kind: "bash", script: "echo testdata > /tmp/test.txt && cat /tmp/test.txt | grep test", timeout_secs: 30, run_in_background: false, display_name: "stdin-grep" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BASH_ONLY,
+ TASK_BASH_ONLY,
30000 // Relaxed timeout for CI stability (was 10s)
);
// Calculate actual tool execution duration
- const toolDuration = getToolDuration(events, "bash");
+ const toolDuration = getToolDuration(events, "task");
// Extract response text
const responseText = extractTextFromEvents(events);
// Verify command completed successfully (not timeout)
// We primarily check bashOutput to ensure the tool executed and didn't hang
- const bashOutput = collectToolOutputs(events, "bash");
- expect(bashOutput).toContain('"test": "data"');
+ const bashOutput = collectToolOutputs(events, "task");
+ expect(bashOutput).toContain("testdata");
// responseText might be empty if the model decides not to comment on the output
// so we make this check optional or less strict if the tool output is correct
@@ -318,14 +337,16 @@ describeIntegration("Runtime Bash Execution", () => {
const maxDuration = 10000;
expect(toolDuration).toBeLessThan(maxDuration);
- // Verify bash tool was called
+ // Verify task(kind="bash") was called
const toolCallStarts = events.filter(
(e) => "type" in e && e.type === "tool-call-start"
);
- const bashCalls = toolCallStarts.filter(
- (e) => "toolName" in e && e.toolName === "bash"
- );
- expect(bashCalls.length).toBeGreaterThan(0);
+ const taskCalls = toolCallStarts.filter((e) => {
+ if (!("toolName" in e) || e.toolName !== "task") return false;
+ const args = (e as { args?: { kind?: string } }).args;
+ return args?.kind === "bash";
+ });
+ expect(taskCalls.length).toBeGreaterThan(0);
} finally {
await cleanup();
}
@@ -364,37 +385,28 @@ describeIntegration("Runtime Bash Execution", () => {
);
try {
- // Create some test files to search through
- await sendMessageAndWait(
- env,
- workspaceId,
- 'Run bash: for i in {1..1000}; do echo "terminal bench line $i" >> testfile.txt; done',
- HAIKU_MODEL,
- BASH_ONLY
- );
-
// Test grep | head pattern - this historically hangs over SSH
// This is a regression test for the bash hang issue
const events = await sendMessageAndWait(
env,
workspaceId,
- 'Run bash: grep -n "terminal bench" testfile.txt | head -n 200',
+ 'Use the task tool with args: { kind: "bash", script: "for i in {1..1000}; do echo \"terminal bench line $i\" >> testfile.txt; done && grep -n \"terminal bench\" testfile.txt | head -n 200", timeout_secs: 60, run_in_background: false, display_name: "grep-head" }. Do not spawn a sub-agent.',
HAIKU_MODEL,
- BASH_ONLY,
+ TASK_BASH_ONLY,
30000 // Relaxed timeout for CI stability (was 15s)
);
// Calculate actual tool execution duration
- const toolDuration = getToolDuration(events, "bash");
+ const toolDuration = getToolDuration(events, "task");
// Verify command completed successfully (not timeout)
- // Check that the bash tool completed (tool-call-end events exist)
+ // Check that task(kind="bash") completed (tool-call-end events exist)
const toolCallEnds = events.filter(
(e) =>
"type" in e &&
e.type === "tool-call-end" &&
"toolName" in e &&
- e.toolName === "bash"
+ e.toolName === "task"
);
expect(toolCallEnds.length).toBeGreaterThan(0);
@@ -404,14 +416,16 @@ describeIntegration("Runtime Bash Execution", () => {
const maxDuration = 15000;
expect(toolDuration).toBeLessThan(maxDuration);
- // Verify bash tool was called
+ // Verify task(kind="bash") was called
const toolCallStarts = events.filter(
(e) => "type" in e && e.type === "tool-call-start"
);
- const bashCalls = toolCallStarts.filter(
- (e) => "toolName" in e && e.toolName === "bash"
- );
- expect(bashCalls.length).toBeGreaterThan(0);
+ const taskCalls = toolCallStarts.filter((e) => {
+ if (!("toolName" in e) || e.toolName !== "task") return false;
+ const args = (e as { args?: { kind?: string } }).args;
+ return args?.kind === "bash";
+ });
+ expect(taskCalls.length).toBeGreaterThan(0);
} finally {
await cleanup();
}
diff --git a/tests/ipc/sendMessage.context.test.ts b/tests/ipc/sendMessage.context.test.ts
index 628ef6865e..99e527386a 100644
--- a/tests/ipc/sendMessage.context.test.ts
+++ b/tests/ipc/sendMessage.context.test.ts
@@ -152,7 +152,7 @@ describeIntegration("sendMessage context handling tests", () => {
describe("tool calls", () => {
test.concurrent(
- "should execute bash tool when requested",
+ 'should execute task(kind="bash") tool when requested',
async () => {
await withSharedWorkspace("anthropic", async ({ env, workspaceId, collector }) => {
const repoPath = getSharedRepoPath();
@@ -162,13 +162,15 @@ describeIntegration("sendMessage context handling tests", () => {
await fs.writeFile(testFilePath, "Hello from test file!");
try {
- // Ask to read the file using bash
- // Default toolPolicy (undefined) allows all tools
+ // Ask to read the file using task(kind="bash")
const result = await sendMessageWithModel(
env,
workspaceId,
- `Read the contents of the file at ${testFilePath} using the bash tool with cat.`,
- modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId)
+ `Use task(kind="bash") to run: cat ${testFilePath}. Set display_name="read-file" and timeout_secs=30. Do not spawn a sub-agent.`,
+ modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId),
+ {
+ toolPolicy: [{ regex_match: "task", action: "require" }],
+ }
);
expect(result.success).toBe(true);
@@ -182,8 +184,12 @@ describeIntegration("sendMessage context handling tests", () => {
(e) => "type" in e && (e as { type: string }).type === "tool-call-start"
);
- // Should have at least one tool call
- expect(toolCallStarts.length).toBeGreaterThan(0);
+ // Should have at least one task(kind="bash") tool call
+ const bashTaskCall = toolCallStarts.find((e) => {
+ if (!("toolName" in e) || e.toolName !== "task") return false;
+ return (e as { args?: { kind?: string } }).args?.kind === "bash";
+ });
+ expect(bashTaskCall).toBeDefined();
} finally {
// Cleanup test file
try {
@@ -206,7 +212,7 @@ describeIntegration("sendMessage context handling tests", () => {
const result = await sendMessageWithModel(
env,
workspaceId,
- "Run the command 'echo test' using bash.",
+ "Run the command 'echo test' using task(kind=\"bash\").",
modelString("anthropic", KNOWN_MODELS.HAIKU.providerModelId),
{
toolPolicy: [{ regex_match: ".*", action: "disable" }],
diff --git a/tests/ipc/truncate.test.ts b/tests/ipc/truncate.test.ts
index 2ffcf1a6a8..0d0f8cb294 100644
--- a/tests/ipc/truncate.test.ts
+++ b/tests/ipc/truncate.test.ts
@@ -4,6 +4,7 @@ import {
createStreamCollector,
assertStreamSuccess,
resolveOrpcClient,
+ modelString,
} from "./helpers";
import { HistoryService } from "../../src/node/services/historyService";
import { createMuxMessage } from "../../src/common/types/message";
@@ -220,7 +221,11 @@ describeIntegration("truncateHistory", () => {
void sendMessageWithModel(
env,
workspaceId,
- "Run this bash command: for i in {1..60}; do sleep 0.5; done && echo done"
+ 'Use task(kind="bash") to run: for i in {1..60}; do sleep 0.5; done && echo done. Set display_name="truncate-stream" and timeout_secs=120. Do not spawn a sub-agent.',
+ modelString("anthropic", "claude-sonnet-4-5"),
+ {
+ toolPolicy: [{ regex_match: "task", action: "require" }],
+ }
);
// Wait for stream to start