thinker best of n opus

jahooma · jahooma · commit 1be71b7b5296 · 2025-11-24T21:28:54.000-08:00
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -65,7 +65,7 @@ export function createBase2(
       isDefault && 'editor-best-of-n',
       isMax && 'editor-best-of-n-max',
       isDefault && 'thinker-best-of-n',
-      isMax && 'thinker-best-of-n-gpt-5',
+      isMax && 'thinker-best-of-n-opus',
       isDefault && 'code-reviewer-gemini',
       isMax && 'code-reviewer-opus',
       'context-pruner',
@@ -118,7 +118,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
   ${buildArray(
     '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.',
     isMax &&
-      '- Spawn the thinker-best-of-n-gpt-5 after gathering context to solve complex problems.',
+      '- Spawn the thinker-best-of-n-opus after gathering context to solve complex problems.',
     `- Spawn a ${isMax ? 'editor-best-of-n-max' : 'editor-best-of-n'} agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
     '- Spawn commanders sequentially if the second command depends on the the first.',
     !isFast &&
@@ -385,7 +385,7 @@ function buildImplementationStepPrompt({
       `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
     !isFast &&
       `You must spawn the ${isMax ? 'editor-best-of-n-max' : 'editor-best-of-n'} agent to implement code changes, since it will generate the best code changes.`,
-    isMax && 'Spawn the thinker-best-of-n-gpt-5 to solve complex problems.',
+    isMax && 'Spawn the thinker-best-of-n-opus to solve complex problems.',
     `After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''}. Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`,
   ).join('\n')
 }
diff --git a/.agents/thinker/best-of-n/thinker-best-of-n-opus.ts b/.agents/thinker/best-of-n/thinker-best-of-n-opus.ts
@@ -0,0 +1,7 @@
+import { createThinkerBestOfN } from './thinker-best-of-n'
+
+const definition = {
+  ...createThinkerBestOfN('opus'),
+  id: 'thinker-best-of-n-opus',
+}
+export default definition
diff --git a/.agents/thinker/best-of-n/thinker-best-of-n.ts b/.agents/thinker/best-of-n/thinker-best-of-n.ts
@@ -32,7 +32,7 @@ export function createThinkerBestOfN(
     inheritParentSystemPrompt: true,
 
     toolNames: ['spawn_agents'],
-    spawnableAgents: ['thinker-selector'],
+    spawnableAgents: [isOpus ? 'thinker-selector-opus' : 'thinker-selector'],
 
     inputSchema: {
       prompt: {
@@ -58,17 +58,102 @@ Answer the user's query to the best of your ability and be extremely concise and
 
 **Important**: Do not use any tools! You are only thinking!`,
 
-    handleSteps,
+    handleSteps: isOpus ? handleStepsOpus : handleStepsDefault,
+  }
+}
+function* handleStepsDefault({
+  agentState,
+  prompt,
+  params,
+}: AgentStepContext): ReturnType<
+  NonNullable<SecretAgentDefinition['handleSteps']>
+> {
+  const selectorAgentType = 'thinker-selector'
+  const n = Math.min(10, Math.max(1, (params?.n as number | undefined) ?? 5))
+
+  // Use GENERATE_N to generate n thinking outputs
+  const { nResponses = [] } = yield {
+    type: 'GENERATE_N',
+    n,
+  }
+
+  // Extract all the thinking outputs
+  const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+  const thoughts = nResponses.map((content, index) => ({
+    id: letters[index],
+    content,
+  }))
+
+  // Spawn selector with thoughts as params
+  const { toolResult: selectorResult } = yield {
+    toolName: 'spawn_agents',
+    input: {
+      agents: [
+        {
+          agent_type: selectorAgentType,
+          params: { thoughts },
+        },
+      ],
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'spawn_agents'>
+
+  const selectorOutput = extractSpawnResults<{
+    thoughtId: string
+  }>(selectorResult)[0]
+
+  if ('errorMessage' in selectorOutput) {
+    yield {
+      type: 'STEP_TEXT',
+      text: selectorOutput.errorMessage,
+    } satisfies StepText
+    return
+  }
+  const { thoughtId } = selectorOutput
+  const chosenThought = thoughts.find((thought) => thought.id === thoughtId)
+  if (!chosenThought) {
+    yield {
+      type: 'STEP_TEXT',
+      text: 'Failed to find chosen thinking output.',
+    } satisfies StepText
+    return
+  }
+
+  yield {
+    type: 'STEP_TEXT',
+    text: chosenThought.content,
+  } satisfies StepText
+
+  function extractSpawnResults<T>(
+    results: any[] | undefined,
+  ): (T | { errorMessage: string })[] {
+    if (!results) return []
+    const spawnedResults = results
+      .filter((result) => result.type === 'json')
+      .map((result) => result.value)
+      .flat() as {
+      agentType: string
+      value: { value?: T; errorMessage?: string }
+    }[]
+    return spawnedResults.map(
+      (result) =>
+        result.value.value ??
+        ({
+          errorMessage:
+            result.value.errorMessage ?? 'Error extracting spawn results',
+        } as { errorMessage: string }),
+    )
   }
 }
 
-function* handleSteps({
+function* handleStepsOpus({
   agentState,
   prompt,
   params,
 }: AgentStepContext): ReturnType<
   NonNullable<SecretAgentDefinition['handleSteps']>
 > {
+  const selectorAgentType = 'thinker-selector-opus'
   const n = Math.min(10, Math.max(1, (params?.n as number | undefined) ?? 5))
 
   // Use GENERATE_N to generate n thinking outputs
@@ -90,7 +175,7 @@ function* handleSteps({
     input: {
       agents: [
         {
-          agent_type: 'thinker-selector',
+          agent_type: selectorAgentType,
           params: { thoughts },
         },
       ],
diff --git a/.agents/thinker/best-of-n/thinker-selector-opus.ts b/.agents/thinker/best-of-n/thinker-selector-opus.ts
@@ -0,0 +1,8 @@
+import { createThinkerSelector } from './thinker-selector'
+
+const definition = {
+  ...createThinkerSelector('opus'),
+  id: 'thinker-selector-opus',
+}
+
+export default definition
diff --git a/.agents/thinker/best-of-n/thinker-selector.ts b/.agents/thinker/best-of-n/thinker-selector.ts
@@ -1,12 +1,20 @@
 import { type SecretAgentDefinition } from '../../types/secret-agent-definition'
 import { publisher } from '../../constants'
 
-const definition: SecretAgentDefinition = {
-  id: 'thinker-selector',
-  publisher,
-  model: 'anthropic/claude-sonnet-4.5',
-  displayName: 'Thinker Output Selector',
-  spawnerPrompt: 'Analyzes multiple thinking outputs and selects the best one',
+export function createThinkerSelector(
+  model: 'sonnet' | 'opus',
+): Omit<SecretAgentDefinition, 'id'> {
+  const isOpus = model === 'opus'
+
+  return {
+    publisher,
+    model: isOpus
+      ? 'anthropic/claude-opus-4.5'
+      : 'anthropic/claude-sonnet-4.5',
+    displayName: isOpus
+      ? 'Opus Thinker Output Selector'
+      : 'Thinker Output Selector',
+    spawnerPrompt: 'Analyzes multiple thinking outputs and selects the best one',
 
   includeMessageHistory: true,
   inheritParentSystemPrompt: true,
@@ -45,7 +53,7 @@ const definition: SecretAgentDefinition = {
     required: ['thoughtId'],
   },
 
-  instructionsPrompt: `As part of the best-of-n workflow for thinking agents, you are the thinking selector agent.
+    instructionsPrompt: `As part of the best-of-n workflow for thinking agents, you are the thinking selector agent.
   
 ## Task Instructions
 
@@ -74,6 +82,12 @@ Use <think> tags to briefly consider the thinking outputs as needed to pick the
 If the best one is obvious or the outputs are very similar, you may not need to think very much (a few words suffice) or you may not need to use think tags at all, just pick the best one and output it. You have a dual goal of picking the best thinking and being fast (using as few words as possible).
 
 Then, do not write any other explanations AT ALL. You should directly output a single tool call to set_output with the selected thoughtId.`,
+  }
+}
+
+const definition: SecretAgentDefinition = {
+  ...createThinkerSelector('sonnet'),
+  id: 'thinker-selector',
 }
 
 export default definition