@@ -4,16 +4,16 @@ import fileExplorerDef from '../../.agents/file-explorer/file-explorer'
44import findAllReferencerDef from '../../.agents/file-explorer/find-all-referencer'
55import { PLACEHOLDER } from '../../.agents/types/secret-agent-definition'
66
7- const promptGeneratorAgentDef : AgentDefinition = {
8- id : 'git-evals2-prompt -generator' ,
9- displayName : 'Git Evals2 Prompt Generator' ,
7+ const evalTaskGeneratorAgentDef : AgentDefinition = {
8+ id : 'git-evals2-eval-task -generator' ,
9+ displayName : 'Git Evals2 Eval Task Generator' ,
1010 model : 'openai/gpt-5' ,
1111 toolNames : [ 'spawn_agents' , 'read_files' , 'set_output' ] ,
1212 spawnableAgents : [ 'file-explorer' , 'find-all-referencer' ] ,
1313 inputSchema : {
1414 prompt : {
1515 type : 'string' ,
16- description : 'Instructions to generate the prompt' ,
16+ description : 'Instructions to generate the task spec and prompt' ,
1717 } ,
1818 } ,
1919 outputMode : 'structured_output' ,
@@ -27,7 +27,12 @@ const promptGeneratorAgentDef: AgentDefinition = {
2727 } ,
2828 reasoning : {
2929 type : 'string' ,
30- description : 'Your thoughts about what should be in the prompt' ,
30+ description : 'Your thoughts about the task, spec, and prompt' ,
31+ } ,
32+ spec : {
33+ type : 'string' ,
34+ description :
35+ 'Clear specification describing WHAT needs to be implemented (observable behavior/structure, not HOW)' ,
3136 } ,
3237 prompt : {
3338 type : 'string' ,
@@ -38,47 +43,56 @@ const promptGeneratorAgentDef: AgentDefinition = {
3843 items : { type : 'string' } ,
3944 description : 'List of supplemental file paths' ,
4045 } ,
41- confidence : {
42- type : 'number' ,
43- description : 'Confidence score 0-1 in the quality of the prompt' ,
44- } ,
4546 } ,
46- required : [ 'id' , 'prompt ' , 'supplementalFiles ' , 'reasoning ' , 'confidence ' ] ,
47+ required : [ 'id' , 'reasoning ' , 'spec ' , 'prompt ' , 'supplementalFiles ' ] ,
4748 } ,
48- systemPrompt : `You are an expert at analyzing git commits and generating high-level user prompts .
49+ systemPrompt : `You are an expert at analyzing git commits and generating evaluation tasks for AI coding assistants .
4950
5051You will receive:
5152- A git diff showing the changes made
5253- The list of files that were edited
5354- An optional commit message
5455- The repository directory where you can explore the codebase
5556
57+ You must generate both a specification (spec) and a user prompt for the task.
58+
5659${ PLACEHOLDER . FILE_TREE_PROMPT }
5760${ PLACEHOLDER . KNOWLEDGE_FILES_CONTENTS } `,
5861
5962 instructionsPrompt : `Your task:
60631. Analyze the git diff to understand what changed
61- 2. Use your tools (read_files, spawn_agents) to explore the codebase and understand context
62- 3. Generate a short, descriptive task ID (2-3 hyphenated words like "fix-auth-bug" or "refactor-login-flow")
63- 4. Identify supplemental files that would help a judge understand the change (exclude directly edited files)
64- 5. Generate a high-level user prompt that describes WHAT needs to be done (not HOW)
64+ 2. Spawn the file-explorer and find-all-referencer to explore the codebase and understand context.
65+ 3. Read as many files relevant to the changes as possible.
66+ 4. Generate the output, including:
67+ - a short, descriptive task ID (2-3 hyphenated words like "fix-auth-bug" or "refactor-login-flow")
68+ - a clear specification describing exactly what needs to be implemented
69+ - a high-level user prompt that describes what needs to be done leaving out details that should be reconstructed by the agent
70+ - supplemental files that would help a judge understand the change (exclude directly edited files)
6571
6672Key principles for the task ID:
6773- 2-3 words maximum, hyphenated (e.g., "fix-memory-leak", "add-user-profile", "refactor-auth-flow")
6874- Descriptive but concise
6975- Use action verbs when appropriate (fix, add, remove, refactor, update, implement)
7076- Lowercase with hyphens
7177
78+ Key principles for the spec:
79+ - Prescribe exactly how to make the change with references to the files that need to be changed
80+ - Not include code
81+ - Focus on the observable behavior or structure that needs to be implemented
82+ - Be clear enough that a skilled developer or AI could implement it from scratch
83+ - Be phrased as what needs to be done, not what was already done
84+ - Cover all the changes shown across multiple files
85+
7286Key principles for the prompt:
73- - Focus on the functional requirement , not implementation details
87+ - Focus on the high-level functional requirements , not implementation details
7488- Use natural language: "add user authentication" not "implement authenticateUser function"
7589- Omit details that should be reconstructed by the agent
7690- Be clear enough that a skilled developer could implement from scratch
7791- Consider the commit message as a hint but don't just copy it
7892` ,
7993}
8094
81- export async function generatePromptFromCommit ( {
95+ export async function generateEvalTask ( {
8296 client,
8397 input,
8498 agentDefinitions,
@@ -95,45 +109,54 @@ export async function generatePromptFromCommit({
95109 agentDefinitions ?: any [ ]
96110} ) : Promise < {
97111 id : string
112+ reasoning : string
113+ spec : string
98114 prompt : string
99115 supplementalFiles : string [ ]
100- confidence : number
101- reasoning : string
102116} > {
103117 const { diff, editedFilePaths, commitMessage, repoPath } = input
104118
105119 const allAgentDefinitions = [
106- promptGeneratorAgentDef ,
120+ evalTaskGeneratorAgentDef ,
107121 fileExplorerDef ,
108122 findAllReferencerDef ,
109123 ...( agentDefinitions || [ ] ) ,
110124 ]
111125
112126 const generatorResult = await client . run ( {
113- agent : 'git-evals2-prompt -generator' ,
127+ agent : 'git-evals2-eval-task -generator' ,
114128 prompt :
115- 'Generate a high-level user prompt based on the git diff and codebase exploration' ,
129+ 'Generate a task specification and user prompt based on the git diff and codebase exploration' ,
116130 params : {
117131 diff,
118132 editedFilePaths,
119133 commitMessage,
120134 } ,
121135 cwd : repoPath ,
122136 agentDefinitions : allAgentDefinitions ,
137+ handleEvent : ( event ) => {
138+ if ( event . type === 'subagent_start' ) {
139+ console . log ( `[Agent] Starting: ${ event . displayName } ` )
140+ } else if ( event . type === 'tool_call' ) {
141+ console . log ( `[Tool] ${ event . toolName } ` )
142+ } else if ( event . type === 'text' ) {
143+ console . log ( `[Text] ${ event . text } ...` )
144+ }
145+ } ,
123146 } )
124147
125148 if (
126149 generatorResult . output . type !== 'structuredOutput' ||
127150 ! generatorResult . output . value
128151 ) {
129- throw new Error ( 'Failed to generate structured prompt output' )
152+ throw new Error ( 'Failed to generate structured task output' )
130153 }
131154
132155 return generatorResult . output . value as {
133156 id : string
157+ reasoning : string
158+ spec : string
134159 prompt : string
135160 supplementalFiles : string [ ]
136- reasoning : string
137- confidence : number
138161 }
139162}
0 commit comments