🤖 fix: simplify subagents + plan-mode delegation

ThomasK33 · ThomasK33 · commit dd1173d4ccbd · 2025-12-20T17:51:32.000+01:00
- Enforce subagent_type enum (explore|exec)\n- Make subagents non-recursive; Plan Mode spawns explore only\n- Hide Exec sub-agent settings; exec always inherits\n\n---\n_Generated with  • Model: unknown • Thinking: unknown_\n&lt;!-- mux-attribution: model=unknown thinking=unknown --&gt;

Change-Id: Ib7e8e5b9340bc9a1117e349f696779caea60ac18
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/stories/App.settings.stories.tsx b/src/browser/stories/App.settings.stories.tsx
@@ -111,8 +111,10 @@ export const Tasks: AppStory = {
     await body.findByText(/Max Parallel Agent Tasks/i);
     await body.findByText(/Max Task Nesting Depth/i);
     await body.findByText(/Sub-agents/i);
-    await body.findByText(/Research/i);
     await body.findByText(/Explore/i);
+    if (body.queryByText(/Exec/i)) {
+      throw new Error("Expected Exec sub-agent settings to be hidden (always inherits)");
+    }
 
     const inputs = await body.findAllByRole("spinbutton");
     if (inputs.length !== 2) {
diff --git a/src/common/constants/agents.ts b/src/common/constants/agents.ts
@@ -1,6 +1,4 @@
-export const BUILT_IN_SUBAGENTS = [
-  { agentType: "research", label: "Research" },
-  { agentType: "explore", label: "Explore" },
-] as const;
+export const BUILT_IN_SUBAGENT_TYPES = ["explore", "exec"] as const;
+export type BuiltInSubagentType = (typeof BUILT_IN_SUBAGENT_TYPES)[number];
 
-export type BuiltInSubagentType = (typeof BUILT_IN_SUBAGENTS)[number]["agentType"];
+export const BUILT_IN_SUBAGENTS = [{ agentType: "explore", label: "Explore" }] as const;
diff --git a/src/common/orpc/schemas/project.ts b/src/common/orpc/schemas/project.ts
@@ -34,8 +34,7 @@ export const WorkspaceConfigSchema = z.object({
       "If set, this workspace is a child workspace spawned from the parent workspaceId (enables nesting in UI and backend orchestration).",
   }),
   agentType: z.string().optional().meta({
-    description:
-      'If set, selects an agent preset for this workspace (e.g., "research" or "explore").',
+    description: 'If set, selects an agent preset for this workspace (e.g., "explore" or "exec").',
   }),
   taskStatus: z.enum(["queued", "running", "awaiting_report", "reported"]).optional().meta({
     description:
diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts
@@ -37,8 +37,7 @@ export const WorkspaceMetadataSchema = z.object({
       "If set, this workspace is a child workspace spawned from the parent workspaceId (enables nesting in UI and backend orchestration).",
   }),
   agentType: z.string().optional().meta({
-    description:
-      'If set, selects an agent preset for this workspace (e.g., "research" or "explore").',
+    description: 'If set, selects an agent preset for this workspace (e.g., "explore" or "exec").',
   }),
   taskStatus: z.enum(["queued", "running", "awaiting_report", "reported"]).optional().meta({
     description:
diff --git a/src/common/types/tasks.ts b/src/common/types/tasks.ts
@@ -31,6 +31,7 @@ export function normalizeSubagentAiDefaults(raw: unknown): SubagentAiDefaults {
   for (const [agentTypeRaw, entryRaw] of Object.entries(record)) {
     const agentType = agentTypeRaw.trim().toLowerCase();
     if (!agentType) continue;
+    if (agentType === "exec") continue;
     if (!entryRaw || typeof entryRaw !== "object") continue;
 
     const entry = entryRaw as Record<string, unknown>;
diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts
@@ -13,6 +13,7 @@ import {
   STATUS_MESSAGE_MAX_LENGTH,
   WEB_FETCH_MAX_OUTPUT_BYTES,
 } from "@/common/constants/toolLimits";
+import { BUILT_IN_SUBAGENT_TYPES } from "@/common/constants/agents";
 import { TOOL_EDIT_WARNING } from "@/common/types/tools";
 
 import { zodToJsonSchema } from "zod-to-json-schema";
@@ -87,9 +88,14 @@ export const AskUserQuestionToolResultSchema = z
 // task (sub-workspaces as subagents)
 // -----------------------------------------------------------------------------
 
+const SubagentTypeSchema = z.preprocess(
+  (value) => (typeof value === "string" ? value.trim().toLowerCase() : value),
+  z.enum(BUILT_IN_SUBAGENT_TYPES)
+);
+
 export const TaskToolArgsSchema = z
   .object({
-    subagent_type: z.string().min(1),
+    subagent_type: SubagentTypeSchema,
     prompt: z.string().min(1),
     description: z.string().optional(),
     run_in_background: z.boolean().default(false),
@@ -467,7 +473,7 @@ export const TOOL_DEFINITIONS = {
   task: {
     description:
       "Spawn a sub-agent task in a child workspace. " +
-      "Use this to delegate work to specialized presets like research or explore. " +
+      'Use this to delegate work to specialized presets like "explore" (read-only investigation) or "exec" (general-purpose coding in a child workspace). ' +
       "If run_in_background is false, this tool blocks until the sub-agent calls agent_report, then returns the report. " +
       "If run_in_background is true, you can await it later with task_await.",
     schema: TaskToolArgsSchema,
diff --git a/src/common/utils/ui/modeUtils.ts b/src/common/utils/ui/modeUtils.ts
@@ -17,12 +17,12 @@ NOTE that this is the only file you are allowed to edit - other than this you ar
 
 Keep the plan crisp and focused on actionable recommendations. Put historical context, alternatives considered, or lengthy rationale into collapsible \`<details>/<summary>\` blocks so the core plan stays scannable.
 
-If you need investigation (codebase exploration or deeper research) before you can produce a good plan, delegate it to sub-agents via the \`task\` tool:
-- Use \`subagent_type: "explore"\` for quick, read-only repo/code exploration (identify relevant files/symbols, callsites, and facts).
-- Use \`subagent_type: "research"\` for deeper investigation and feasibility analysis in this codebase (it may delegate to \`explore\`; web research is optional when relevant).
+If you need investigation (codebase exploration, tracing callsites, locating patterns, feasibility checks) before you can produce a good plan, delegate it to Explore sub-agents via the \`task\` tool:
+- In Plan Mode, you MUST ONLY spawn \`subagent_type: "explore"\` tasks. Do NOT spawn \`subagent_type: "exec"\` tasks in Plan Mode.
+- Use \`subagent_type: "explore"\` for read-only repo/code exploration and optional web lookups when relevant.
 - In each task prompt, specify explicit deliverables (what questions to answer, what files/symbols to locate, and the exact output format you want back).
-- Run tasks in parallel with \`run_in_background: true\`, then use \`task_await\` (optionally with \`task_ids\`) until all spawned tasks are \`completed\`.
-- After spawning one or more tasks, do NOT continue with your own investigation/planning in parallel. Await the task reports first, then synthesize and proceed.
+- Prefer running multiple Explore tasks in parallel with \`run_in_background: true\`, then use \`task_await\` (optionally with \`task_ids\`) until all spawned tasks are \`completed\`.
+- While Explore tasks run, do NOT perform broad repo exploration yourself. Wait for the reports, then synthesize the plan in this session.
 - Do NOT call \`propose_plan\` until you have awaited and incorporated sub-agent reports.
 
 If you need clarification from the user before you can finalize the plan, you MUST use the ask_user_question tool.
@@ -35,6 +35,7 @@ If you need clarification from the user before you can finalize the plan, you MU
 
 When you have finished writing your plan and are ready for user approval, call the propose_plan tool.
 Do not make other edits in plan mode. You may have tools like bash but only use them for read-only operations.
+Read-only bash means: no redirects/heredocs, no rm/mv/cp/mkdir/touch, no git add/commit, and no dependency installs.
 
 If the user suggests that you should make edits to other files, ask them to switch to Exec mode first!
 `;
diff --git a/src/node/services/agentPresets.ts b/src/node/services/agentPresets.ts
@@ -1,19 +1,13 @@
 import type { ToolPolicy } from "@/common/utils/tools/toolPolicy";
 
 export interface AgentPreset {
-  /** Normalized agentType key (e.g., "research") */
+  /** Normalized agentType key (e.g., "explore" or "exec") */
   agentType: string;
   toolPolicy: ToolPolicy;
   systemPrompt: string;
 }
 
-const TASK_TOOL_NAMES = [
-  "task",
-  "task_await",
-  "task_list",
-  "task_terminate",
-  "agent_report",
-] as const;
+const REPORTING_TOOL_NAMES = ["agent_report"] as const;
 
 function enableOnly(...toolNames: readonly string[]): ToolPolicy {
   return [
@@ -25,14 +19,14 @@ function enableOnly(...toolNames: readonly string[]): ToolPolicy {
 const REPORTING_PROMPT_LINES = [
   "Reporting:",
   "- When you have a final answer, call agent_report exactly once.",
-  "- Do not call agent_report until any spawned sub-tasks have completed and you have integrated their results.",
+  "- Do not call agent_report until you have completed the assigned task and integrated all relevant findings.",
 ] as const;
 
 function buildSystemPrompt(args: {
   agentLabel: string;
   goals: string[];
   rules: string[];
-  delegation: string[];
+  delegation?: string[];
 }): string {
   return [
     `You are a ${args.agentLabel} sub-agent running inside a child workspace.`,
@@ -43,30 +37,32 @@ function buildSystemPrompt(args: {
     "Rules:",
     ...args.rules,
     "",
-    "Delegation:",
-    ...args.delegation,
-    "",
+    ...(args.delegation && args.delegation.length > 0
+      ? ["Delegation:", ...args.delegation, ""]
+      : []),
     ...REPORTING_PROMPT_LINES,
   ].join("\n");
 }
 
-const RESEARCH_PRESET: AgentPreset = {
-  agentType: "research",
-  toolPolicy: enableOnly("web_search", "web_fetch", "file_read", ...TASK_TOOL_NAMES),
+const EXEC_PRESET: AgentPreset = {
+  agentType: "exec",
+  toolPolicy: [
+    // Non-recursive: subagents must not spawn more subagents.
+    { regex_match: "task", action: "disable" },
+    { regex_match: "task_.*", action: "disable" },
+    // Only the main plan-mode session should call propose_plan.
+    { regex_match: "propose_plan", action: "disable" },
+  ],
   systemPrompt: buildSystemPrompt({
-    agentLabel: "Research",
+    agentLabel: "Exec",
     goals: [
-      "- Gather accurate, relevant information efficiently.",
-      "- Prefer primary sources and official docs when possible.",
+      "- Complete the assigned coding task end-to-end in this child workspace.",
+      "- Make minimal, correct changes that match existing codebase patterns.",
     ],
     rules: [
-      "- Do not edit files.",
-      "- Do not run bash commands unless explicitly enabled (assume it is not).",
-      "- If the task tool is available and you need repository exploration beyond file_read, delegate to an Explore sub-agent.",
-      "- Use task_list only for discovery (e.g. after interruptions). Do not poll task_list to wait; use task_await to wait for completion.",
-    ],
-    delegation: [
-      '- If available, use: task({ subagent_type: "explore", prompt: "..." }) when you need repo exploration.',
+      "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).",
+      "- Do not call propose_plan.",
+      "- Prefer small, reviewable diffs and run targeted checks when feasible.",
     ],
   }),
 };
@@ -79,29 +75,32 @@ const EXPLORE_PRESET: AgentPreset = {
     "bash_output",
     "bash_background_list",
     "bash_background_terminate",
-    ...TASK_TOOL_NAMES
+    "web_fetch",
+    "web_search",
+    "google_search",
+    ...REPORTING_TOOL_NAMES
   ),
   systemPrompt: buildSystemPrompt({
     agentLabel: "Explore",
     goals: [
       "- Explore the repository to answer the prompt using read-only investigation.",
-      "- Keep output concise and actionable (paths, symbols, and findings).",
+      "- Return concise, actionable findings (paths, symbols, callsites, and facts).",
     ],
     rules: [
-      "- Do not edit files.",
-      "- Treat bash as read-only: prefer commands like rg, ls, cat, git show, git diff (read-only).",
-      "- If the task tool is available and you need external information, delegate to a Research sub-agent.",
-      "- Use task_list only for discovery (e.g. after interruptions). Do not poll task_list to wait; use task_await to wait for completion.",
-    ],
-    delegation: [
-      '- If available, use: task({ subagent_type: "research", prompt: "..." }) when you need web research.',
+      "=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===",
+      "- You MUST NOT create, edit, delete, move, or copy files.",
+      "- You MUST NOT create temporary files anywhere (including /tmp).",
+      "- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.",
+      "- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).",
+      "- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).",
+      "- Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).",
     ],
   }),
 };
 
 const PRESETS_BY_AGENT_TYPE: Record<string, AgentPreset> = {
-  research: RESEARCH_PRESET,
   explore: EXPLORE_PRESET,
+  exec: EXEC_PRESET,
 };
 
 export function getAgentPreset(agentType: string | undefined): AgentPreset | null {
diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts
@@ -926,9 +926,9 @@ describe("TaskService", () => {
               {
                 path: path.join(projectPath, "task"),
                 id: taskId,
-                name: "agent_research_task",
+                name: "agent_exec_task",
                 parentWorkspaceId: rootWorkspaceId,
-                agentType: "research",
+                agentType: "exec",
                 taskStatus: "running",
               },
             ],
@@ -982,9 +982,9 @@ describe("TaskService", () => {
               {
                 path: path.join(projectPath, "parent-task"),
                 id: parentTaskId,
-                name: "agent_research_parent",
+                name: "agent_exec_parent",
                 parentWorkspaceId: rootWorkspaceId,
-                agentType: "research",
+                agentType: "exec",
                 taskStatus: "running",
               },
               {
@@ -1224,9 +1224,9 @@ describe("TaskService", () => {
               {
                 path: path.join(projectPath, "parent-task"),
                 id: parentTaskId,
-                name: "agent_research_parent",
+                name: "agent_exec_parent",
                 parentWorkspaceId: rootWorkspaceId,
-                agentType: "research",
+                agentType: "exec",
                 taskStatus: "running",
               },
               {
@@ -1279,9 +1279,9 @@ describe("TaskService", () => {
               {
                 path: path.join(projectPath, "parent-task"),
                 id: parentTaskId,
-                name: "agent_research_parent",
+                name: "agent_exec_parent",
                 parentWorkspaceId: rootWorkspaceId,
-                agentType: "research",
+                agentType: "exec",
                 taskStatus: "awaiting_report",
               },
               {
diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts
@@ -106,4 +106,42 @@ describe("task tool", () => {
       expect(caught.message).toMatch(/maxTaskNestingDepth/i);
     }
   });
+
+  it('should reject spawning "exec" tasks while in plan mode', async () => {
+    using tempDir = new TestTempDir("test-task-tool");
+    const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" });
+
+    const create = mock(() =>
+      Ok({ taskId: "child-task", kind: "agent" as const, status: "running" as const })
+    );
+    const waitForAgentReport = mock(() =>
+      Promise.resolve({
+        reportMarkdown: "Hello from child",
+        title: "Result",
+      })
+    );
+    const taskService = { create, waitForAgentReport } as unknown as TaskService;
+
+    const tool = createTaskTool({
+      ...baseConfig,
+      mode: "plan",
+      taskService,
+    });
+
+    let caught: unknown = null;
+    try {
+      await Promise.resolve(
+        tool.execute!({ subagent_type: "exec", prompt: "do it" }, mockToolCallOptions)
+      );
+    } catch (error: unknown) {
+      caught = error;
+    }
+
+    expect(caught).toBeInstanceOf(Error);
+    if (caught instanceof Error) {
+      expect(caught.message).toMatch(/plan mode/i);
+    }
+    expect(create).not.toHaveBeenCalled();
+    expect(waitForAgentReport).not.toHaveBeenCalled();
+  });
 });
diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts
@@ -18,6 +18,11 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
         throw new Error("Interrupted");
       }
 
+      // Plan mode is explicitly non-executing. Allow only read-only exploration tasks.
+      if (config.mode === "plan" && args.subagent_type === "exec") {
+        throw new Error('In Plan Mode you may only spawn subagent_type: "explore" tasks.');
+      }
+
       const modelString =
         config.muxEnv && typeof config.muxEnv.MUX_MODEL_STRING === "string"
           ? config.muxEnv.MUX_MODEL_STRING
diff --git a/src/node/services/tools/task_list.test.ts b/src/node/services/tools/task_list.test.ts
@@ -56,8 +56,8 @@ describe("task_list tool", () => {
         taskId: "task-1",
         status: "running",
         parentWorkspaceId: "root-workspace",
-        agentType: "research",
-        workspaceName: "agent_research_task-1",
+        agentType: "exec",
+        workspaceName: "agent_exec_task-1",
         title: "t",
         createdAt: "2025-01-01T00:00:00.000Z",
         modelString: "anthropic:claude-haiku-4-5",
@@ -77,8 +77,8 @@ describe("task_list tool", () => {
           taskId: "task-1",
           status: "running",
           parentWorkspaceId: "root-workspace",
-          agentType: "research",
-          workspaceName: "agent_research_task-1",
+          agentType: "exec",
+          workspaceName: "agent_exec_task-1",
           title: "t",
           createdAt: "2025-01-01T00:00:00.000Z",
           modelString: "anthropic:claude-haiku-4-5",