Add validator and code-reviewer

jahooma · jahooma · commit 35a810565921 · 2025-10-16T15:06:19.000-07:00
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -8,8 +8,7 @@ import {
 
 export const createBase2: (
   mode: 'normal' | 'max',
-) => Omit<SecretAgentDefinition, 'id'> = (mode) => {
-  const isMax = mode === 'max'
+) => Omit<SecretAgentDefinition, 'id'> = () => {
   return {
     publisher,
     model: 'anthropic/claude-sonnet-4.5',
@@ -43,7 +42,8 @@ export const createBase2: (
       'researcher-docs',
       'commander',
       'generate-plan',
-      'reviewer',
+      'code-reviewer',
+      'validator',
       'context-pruner',
     ),
 
@@ -63,9 +63,9 @@ Continue to spawn layers of agents until have completed the user's request or re
   - Spawn file pickers, code-searcher, directory-lister, glob-matcher, commanders, and researchers before making edits.
   - Spawn generate-plan agent after you have gathered all the context you need (and not before!).
   - Only make edits after generating a plan.
-  - Reviewers should be spawned after you have made your edits.
+  - Code reviewers/validators should be spawned after you have made your edits.
 - **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
-- **Don't spawn reviewers for trivial changes or quick follow-ups:** You should spawn the reviewer for most changes, but not for little changes or simple follow-ups.
+- **Don't spawn code reviewers/validators for trivial changes or quick follow-ups:** You should spawn the code reviewer/validator for most changes, but not for little changes or simple follow-ups.
 
 # Core Mandates
 
@@ -130,8 +130,8 @@ The user asks you to implement a new feature. You respond in multiple steps:
 2a. Read all the relevant files using the read_files tool.
 3. Spawn a generate-plan agent to generate a plan for the changes.
 4. Use the str_replace or write_file tool to make the changes.
-5. Spawn a reviewer to review the changes.
-6. Fix any issues raised by the reviewer.
+5. Spawn a code-reviewer to review the changes. Consider making changes suggested by the code-reviewer.
+6. Spawn a validator to run validation commands (tests, typechecks, etc.) to ensure the changes are correct.
 7. Inform the user that you have completed the task in one sentence without a final summary.`,
 
     stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, the generate-plan agent to create a plan, and the reviewer to review changes.`,
diff --git a/.agents/codebase-commands-explorer.ts b/.agents/codebase-commands-explorer.ts
@@ -3,27 +3,25 @@ import type { AgentDefinition } from './types/agent-definition'
 const definition: AgentDefinition = {
   id: 'codebase-commands-explorer',
   displayName: 'Codebase Commands Explorer',
-  publisher: 'james',
-  model: 'openai/gpt-5',
-  reasoningOptions: {
-    enabled: true,
-    effort: 'low',
-    exclude: true,
-  },
+  publisher: 'codebuff',
+  model: 'x-ai/grok-code-fast-1',
 
   spawnerPrompt: `Analyzes any project's codebase to comprehensively discover all commands needed to build, test, and run the project. Provides detailed analysis of project structure, tech stack, and working commands with confidence scores.`,
 
-  toolNames: ['spawn_agents', 'set_output'],
+  toolNames: ['spawn_agents', 'read_files', 'set_output'],
   spawnableAgents: [
-    'codebuff/file-explorer@0.0.4',
-    'codebuff/read-only-commander-lite@0.0.1',
+    'file-picker',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
+    'commander',
   ],
 
   inputSchema: {
     prompt: {
       type: 'string',
       description:
-        'Optional specific focus areas or requirements for the codebase analysis (e.g., "focus on test commands" or "include CI/CD analysis")',
+        'Optional specific focus areas or requirements for the codebase analysis (e.g., "focus on test commands")',
     },
   },
 
@@ -117,111 +115,13 @@ const definition: AgentDefinition = {
           required: ['command', 'description', 'category', 'confidenceScore'],
         },
       },
-      setupRequirements: {
-        type: 'array',
-        items: {
-          type: 'object',
-          properties: {
-            requirement: {
-              type: 'string',
-              description: 'Setup requirement description',
-            },
-            commands: {
-              type: 'array',
-              items: { type: 'string' },
-              description: 'Commands to fulfill this requirement',
-            },
-            priority: {
-              type: 'string',
-              enum: ['critical', 'recommended', 'optional'],
-              description: 'Priority level',
-            },
-          },
-          required: ['requirement', 'commands', 'priority'],
-        },
-      },
-      cicdAnalysis: {
-        type: 'object',
-        properties: {
-          ciFilesFound: {
-            type: 'array',
-            items: { type: 'string' },
-            description: 'CI/CD configuration files detected',
-          },
-          officialCommands: {
-            type: 'array',
-            items: { type: 'string' },
-            description: 'Commands found in CI/CD files',
-          },
-          platforms: {
-            type: 'array',
-            items: { type: 'string' },
-            description:
-              'CI/CD platforms detected (GitHub Actions, GitLab CI, etc.)',
-          },
-        },
-        required: ['ciFilesFound', 'officialCommands', 'platforms'],
-      },
     },
-    required: [
-      'projectOverview',
-      'workingCommands',
-      'setupRequirements',
-      'cicdAnalysis',
-    ],
+    required: ['projectOverview', 'workingCommands'],
   },
 
   systemPrompt: `You are an expert codebase explorer that comprehensively analyzes any software project to discover all build, test, and run commands. You orchestrate multiple specialized agents to explore the project structure and test commands in parallel for maximum efficiency.`,
 
-  instructionsPrompt: `Your mission is to provide a comprehensive analysis of any codebase to discover all working commands for building, testing, and running the project.
-
-## Analysis Strategy:
-
-1. **Project Structure Exploration**: First spawn file-explorer to understand the project layout, key files, and technology stack.
-  In parallel, spawn a second file-explorer to learn about the build, lint, and testing processes across the codebase.
-
-2. **Massive Parallel Command Testing**: Only after fully completing step 1 and getting back the results, spawn MANY (10-15) read-only-commander agents simultaneously to test different command combinations, including for any relevant sub-directories if this is a monorepo.
-  Look for commands for the following project types:
-   - Web apps: next.js, react, vue, etc. commands (build, test, start, dev, lint, etc.)
-   - Node.js projects: npm/yarn/pnpm commands (build, test, start, dev, lint, etc.)
-   - Python projects: pip, pytest, setup.py, tox commands
-   - Rust projects: cargo commands (build, test, run, check, etc.)
-   ...And so on for all project types
-
-  Include CI/CD Analysis: Have agents examine CI/CD files (.github/workflows, .gitlab-ci.yml, etc.) to discover official build processes
-
-3. **Final Analysis**: Use the set_output tool to output the results of the analysis. Rate each working command based on:
-   - Success rate of execution
-   - Presence in official documentation/CI
-   - Standard conventions for the project type
-   - Output quality and expected behavior
-
-## Command Categories to Test:
-- **install**: Dependency installation commands
-- **build**: Compilation and build commands
-- **test**: All types of testing (unit, integration, e2e)
-- **run**: Application execution commands
-- **dev**: Development server/watch commands
-- **lint**: Code linting and static analysis
-- **format**: Code formatting commands
-- **clean**: Cleanup and reset commands
-
-## Be Extremely Thorough:
-- Try multiple package managers if multiple are detected
-- Test both short and long command forms
-- Check for custom scripts in package.json, Makefile, etc.
-- Test commands with different flags and options
-- Verify commands work from different directories
-- Check for environment-specific requirements
-
-## Special Focus Areas:
-- Look for monorepo structures and workspace commands
-- Detect containerized setups and associated commands
-- Find database setup/migration commands
-- Identify development vs production commands
-- Discover deployment and release commands
-
-Provide a comprehensive, structured output that gives developers everything they need to understand and work with the codebase immediately.`,
+  instructionsPrompt: `Your mission is to provide a comprehensive analysis of any codebase to discover all working commands for building, testing, and running the project, according to the user prompt.`,
 }
 
 export default definition
diff --git a/.agents/reviewer/code-reviewer-gpt-5.ts b/.agents/reviewer/code-reviewer-gpt-5.ts
@@ -0,0 +1,10 @@
+import reviewer from './reviewer'
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  ...reviewer,
+  id: 'code-reviewer-gpt-5',
+  model: 'openai/gpt-5',
+}
+
+export default definition
diff --git a/.agents/reviewer/code-reviewer.ts b/.agents/reviewer/code-reviewer.ts
@@ -0,0 +1,59 @@
+import { publisher } from '../constants'
+import {
+  PLACEHOLDER,
+  type SecretAgentDefinition,
+} from '../types/secret-agent-definition'
+import type { Model } from '@codebuff/common/old-constants'
+
+export const createReviewer = (
+  model: Model,
+): Omit<SecretAgentDefinition, 'id'> => ({
+  model,
+  displayName: 'Nit Pick Nick',
+  spawnerPrompt:
+    'Reviews file changes and responds with critical feedback. Use this after making any significant change to the codebase; otherwise, no need to use this agent for minor changes since it takes a second.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'What should be reviewed. Be brief.',
+    },
+  },
+  outputMode: 'last_message',
+  toolNames: [],
+  spawnableAgents: [],
+
+  inheritParentSystemPrompt: true,
+  includeMessageHistory: true,
+
+  instructionsPrompt: `For reference, here is the original user request:
+<user_message>
+${PLACEHOLDER.USER_INPUT_PROMPT}
+</user_message>
+
+Your task is to provide helpful feedback on the last file changes made by the assistant.
+
+NOTE: You cannot make any changes directly! You can only suggest changes.
+
+You should critique the code changes made recently in the above conversation. Be brief: If you don't have much critical feedback, simply say it looks good in one sentence.
+
+- Focus on getting to a complete and correct solution as the top priority.
+- Make sure all the requirements in the user's message are addressed. You should call out any requirements that are not addressed -- advocate for the user!
+- Try to keep any changes to the codebase as minimal as possible.
+- Simplify any logic that can be simplified.
+- Where a function can be reused, reuse it and do not create a new one.
+- Make sure that no new dead code is introduced.
+- Make sure there are no missing imports.
+- Make sure no sections were deleted that weren't supposed to be deleted.
+- Make sure the new code matches the style of the existing code.
+- Make sure there are no unnecessary try/catch blocks. Prefer to remove those.
+
+Be extremely concise.`,
+})
+
+const definition: SecretAgentDefinition = {
+  id: 'code-reviewer',
+  publisher,
+  ...createReviewer('anthropic/claude-sonnet-4.5'),
+}
+
+export default definition
diff --git a/.agents/validator.ts b/.agents/validator.ts
@@ -0,0 +1,41 @@
+import { publisher } from './constants'
+import type { AgentDefinition } from './types/agent-definition'
+
+const fixer: AgentDefinition = {
+  id: 'validator',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Validator',
+  spawnerPrompt:
+    'Attempts to build/test/verify the project and automatically fix issues it finds. Useful after making edits or when CI/typecheck/tests are failing. Works across monorepos: discovers scripts (build/test/typecheck/lint), runs them, analyzes failures, and applies minimal fixes in a loop until success or max attempts.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description:
+        'Optional context about what to verify/fix (e.g., a specific package, script, or error focus).',
+    },
+  },
+  outputMode: 'last_message',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: ['read_files', 'str_replace', 'write_file', 'spawn_agents'],
+  spawnableAgents: [
+    'codebase-commands-explorer',
+    'file-picker',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
+    'commander',
+  ],
+
+  instructionsPrompt: `Insructions:
+1. Spawn the codebase-commands-explorer agent to discover how to build/test/verify the project.
+2. Run the commands to validate the project
+3. Fix any issues found
+4. Repeat 2 and 3 until the project is validated successfully.
+5. Give a final summary that includes the exact commands you ran and the issues you fixed and the final state (are all the types/tests passing?). Be extremely concise.`,
+}
+
+export default fixer