best of n fast

jahooma · jahooma · commit c1b1b3977a16 · 2025-10-30T14:59:21.000-07:00
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -11,9 +11,10 @@ export const createBase2: (
   options?: {
     hasNoValidation?: boolean
     bestOfN?: boolean
+    bestOfNFast?: boolean
   },
 ) => Omit<SecretAgentDefinition, 'id'> = (mode, options) => {
-  const { hasNoValidation = false, bestOfN = false } = options ?? {}
+  const { hasNoValidation = false, bestOfN = false, bestOfNFast = false } = options ?? {}
   const isFast = mode === 'fast'
   const isMax = mode === 'max'
 
@@ -58,6 +59,7 @@ export const createBase2: (
       'researcher-docs',
       'commander',
       bestOfN && 'base2-best-of-n-orchestrator',
+      bestOfNFast && 'base2-best-of-n-fast-orchestrator',
       isMax && 'base2-gpt-5-worker',
       'context-pruner',
     ),
@@ -149,7 +151,9 @@ ${buildArray(
     `- Use the write_todos tool to write out your step-by-step implementation plan.${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'}`,
   bestOfN &&
     `- You must spawn the base2-best-of-n-orchestrator agent to implement the code changes, since it will generate multiple implementation proposals and select the best one, which the user wants you to do.`,
-  !bestOfN &&
+  bestOfNFast &&
+    `- You must spawn the base2-best-of-n-fast-orchestrator agent to implement the code changes, since it will generate multiple implementation proposals and select the best one, which the user wants you to do.`,
+  !bestOfN && !bestOfNFast &&
     isFast &&
     `- Use the str_replace or write_file tool to make the changes. (Pause after making all the changes to see the tool results of your edits and double check they went through correctly.)`,
   isMax &&
diff --git a/.agents/base2/best-of-n/base2-best-of-n-fast-orchestrator.ts b/.agents/base2/best-of-n/base2-best-of-n-fast-orchestrator.ts
@@ -0,0 +1,153 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+import { StepText, ToolCall } from 'types/agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'base2-best-of-n-fast-orchestrator',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Best-of-N Fast Implementation Orchestrator',
+  spawnerPrompt:
+    'Orchestrates multiple implementor agents to generate implementation proposals and selects the best one',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: [
+    'spawn_agents',
+    'str_replace',
+    'write_file',
+    'set_messages',
+    'set_output',
+  ],
+  spawnableAgents: [
+    'base2-implementor-step',
+    'base2-implementor-step-gpt-5',
+    'base2-selector-fast',
+  ],
+
+  inputSchema: {},
+  outputMode: 'structured_output',
+
+  handleSteps: function* ({ agentState }) {
+    // Remove userInstruction message for this agent.
+    const messages = agentState.messageHistory.concat()
+    messages.pop()
+    yield {
+      toolName: 'set_messages',
+      input: {
+        messages,
+      },
+      includeToolCall: false,
+    } satisfies ToolCall<'set_messages'>
+
+    // Spawn 1 of each model for easy prompt caching
+    const { toolResult: implementorsResult1 } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          { agent_type: 'base2-implementor-step' },
+          { agent_type: 'base2-implementor-step' },
+          { agent_type: 'base2-implementor-step' },
+          { agent_type: 'base2-implementor-step' },
+          { agent_type: 'base2-implementor-step' },
+        ],
+      },
+      includeToolCall: false,
+    }
+    const implementorsResult = extractSpawnResults<string>(implementorsResult1)
+
+    // Extract all the plans from the structured outputs
+    const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    // Parse implementations from tool results
+    const implementations = implementorsResult.map((content, index) => ({
+      id: letters[index],
+      content,
+    }))
+
+    // Spawn selector with implementations as params
+    const { toolResult: selectorResult } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          {
+            agent_type: 'base2-selector-fast',
+            params: { implementations },
+          },
+        ],
+      },
+      includeToolCall: false,
+    } satisfies ToolCall<'spawn_agents'>
+
+    const selectorOutput = extractSpawnResults<{
+      implementationId: string
+      reasoning: string
+    }>(selectorResult)[0]
+
+    if ('errorMessage' in selectorOutput) {
+      yield {
+        toolName: 'set_output',
+        input: { error: selectorOutput.errorMessage },
+      } satisfies ToolCall<'set_output'>
+      return
+    }
+    const { implementationId } = selectorOutput
+    const chosenImplementation = implementations.find(
+      (implementation) => implementation.id === implementationId,
+    )
+    if (!chosenImplementation) {
+      yield {
+        toolName: 'set_output',
+        input: { error: 'Failed to find chosen implementation.' },
+      } satisfies ToolCall<'set_output'>
+      return
+    }
+
+    // Spawn editor to apply the chosen implementation
+    const { agentState: postEditsAgentState } = yield {
+      type: 'STEP_TEXT',
+      text: chosenImplementation.content,
+    } as StepText
+    const { messageHistory } = postEditsAgentState
+    const lastAssistantMessageIndex = messageHistory.findLastIndex(
+      (message) => message.role === 'assistant',
+    )
+    const editToolResults = messageHistory
+      .slice(lastAssistantMessageIndex)
+      .filter((message) => message.role === 'tool')
+      .flatMap((message) => message.content.output)
+      .filter((output) => output.type === 'json')
+      .map((output) => output.value)
+
+    // Set output with the chosen implementation and reasoning
+    yield {
+      toolName: 'set_output',
+      input: {
+        response: chosenImplementation.content,
+        toolResults: editToolResults,
+      },
+    } satisfies ToolCall<'set_output'>
+
+    function extractSpawnResults<T>(
+      results: any[] | undefined,
+    ): (T | { errorMessage: string })[] {
+      if (!results) return []
+      const spawnedResults = results
+        .filter((result) => result.type === 'json')
+        .map((result) => result.value)
+        .flat() as {
+        agentType: string
+        value: { value?: T; errorMessage?: string }
+      }[]
+      return spawnedResults.map(
+        (result) =>
+          result.value.value ?? {
+            errorMessage:
+              result.value.errorMessage ?? 'Error extracting spawn results',
+          },
+      )
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-best-of-n-fast.ts b/.agents/base2/best-of-n/base2-best-of-n-fast.ts
@@ -0,0 +1,11 @@
+import { createBase2 } from '../base2'
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+
+const base2 = createBase2('fast', { bestOfNFast: true })
+const definition: SecretAgentDefinition = {
+  ...base2,
+  id: 'base2-best-of-n-fast',
+  displayName: 'Buffy Best-of-N Orchestrator',
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-selector-fast.ts b/.agents/base2/best-of-n/base2-selector-fast.ts
@@ -0,0 +1,40 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import base2Selector from './base2-selector'
+
+const definition: SecretAgentDefinition = {
+  ...base2Selector,
+  id: 'base2-selector-fast',
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Best-of-N Fast Implementation Selector',
+  outputSchema: {
+    type: 'object',
+    properties: {
+      implementationId: {
+        type: 'string',
+        description: 'The id of the chosen implementation',
+      },
+    },
+    required: ['implementationId'],
+  },
+
+  instructionsPrompt: `As part of the best-of-n workflow of agents, you are the implementation selector agent. You have been provided with multiple implementation proposals via params.
+
+The implementations are available in the params.implementations array, where each has:
+- id: A unique identifier for the implementation
+- content: The full implementation text with tool calls
+
+Your task is to analyze each implementation proposal carefully, compare them against the original user requirements, and select the best implementation.
+Evaluate each based on:
+- Correctness and completeness
+- Simplicity and maintainability
+- Code quality and adherence to project conventions
+- Minimal changes to existing code
+- Proper reuse of existing helpers and patterns
+- Clarity and readability
+
+Do not write any reasoning or explanations AT ALL.
+
+Your response should be only a single tool call to set_output with the selected implementationId.`,
+}
+
+export default definition