@@ -8,17 +8,20 @@ import { CodebuffClient } from '../../sdk/src/client'
88import { runAgentOnCommit } from './agent-runner'
99import { judgeCommitResult } from './judge'
1010import { analyzeAgentTraces , type AgentTraceData } from './trace-analyzer'
11-
12- import type {
13- EvalData ,
14- GitEvals2Options ,
15- GitEvals2Result ,
16- AgentEvalResults ,
17- } from './types'
18-
19- export async function runGitEvals2 (
20- options : GitEvals2Options ,
21- ) : Promise < GitEvals2Result > {
11+ import { AgentEvalResults , EvalData , ProgressEvent } from './types'
12+
13+ export async function runGitEvals2 ( options : {
14+ evalDataPath : string
15+ agents : string [ ]
16+ outputPath ?: string
17+ limit ?: number
18+ onProgress ?: ( event : ProgressEvent ) => void
19+ client ?: CodebuffClient
20+ } ) : Promise < {
21+ agents : Record < string , AgentEvalResults >
22+ timestamp : string
23+ totalDuration : number
24+ } > {
2225 const { evalDataPath, agents, outputPath, limit, onProgress } = options
2326
2427 const evalData : EvalData = JSON . parse ( fs . readFileSync ( evalDataPath , 'utf-8' ) )
@@ -33,7 +36,7 @@ export async function runGitEvals2(
3336 } )
3437
3538 const startTime = Date . now ( )
36- const results = new Map < string , AgentEvalResults > ( )
39+ const results : Record < string , AgentEvalResults > = { }
3740
3841 // Create logs directory with current date and time
3942 const date = new Date ( ) . toISOString ( ) . replace ( / : / g, '-' ) . slice ( 0 , 16 ) // YYYY-MM-DDTHH-MM
@@ -46,13 +49,13 @@ export async function runGitEvals2(
4649 }
4750
4851 for ( const agentId of agents ) {
49- results . set ( agentId , {
52+ results [ agentId ] = {
5053 agentId,
5154 runs : [ ] ,
5255 averageScore : 0 ,
5356 averageCost : 0 ,
5457 averageDuration : 0 ,
55- } )
58+ }
5659 }
5760
5861 for ( const commit of commitsToRun ) {
@@ -90,8 +93,7 @@ export async function runGitEvals2(
9093 commitSha : commit . sha ,
9194 spec : commit . spec ,
9295 diff : agentResult . diff ,
93- judgeScore : judgeResult . overallScore ,
94- judgeFeedback : judgeResult . analysis ,
96+ judging : judgeResult ,
9597 cost : agentResult . cost ,
9698 durationMs : agentResult . durationMs ,
9799 error : agentResult . error ,
@@ -101,7 +103,7 @@ export async function runGitEvals2(
101103 const safeSpec = commit . spec
102104 . split ( '\n' ) [ 0 ]
103105 . replace ( / [ ^ a - z A - Z 0 - 9 ] / g, '_' )
104- . slice ( 0 , 30 )
106+ . slice ( 0 , 20 )
105107 const safeAgentId = agentId . replace ( / [ ^ a - z A - Z 0 - 9 - ] / g, '_' )
106108 const safeCommitShort = commit . sha . slice ( 0 , 7 )
107109 const traceFilename = `${ safeSpec } -${ safeAgentId } -${ safeCommitShort } .json`
@@ -151,8 +153,14 @@ export async function runGitEvals2(
151153 commitSha : commit . sha ,
152154 spec : commit . spec ,
153155 diff : '' ,
154- judgeScore : 0 ,
155- judgeFeedback : '' ,
156+ judging : {
157+ analysis : '' ,
158+ strengths : [ ] ,
159+ weaknesses : [ ] ,
160+ completionScore : 0 ,
161+ codeQualityScore : 0 ,
162+ overallScore : 0 ,
163+ } ,
156164 cost : 0 ,
157165 durationMs : 0 ,
158166 error : errorMessage ,
@@ -164,8 +172,7 @@ export async function runGitEvals2(
164172 const agentResults = await Promise . all ( agentPromises )
165173
166174 for ( const { agentId, evalRun } of agentResults ) {
167- const agentData = results . get ( agentId ) !
168- agentData . runs . push ( evalRun )
175+ results [ agentId ] . runs . push ( evalRun )
169176 }
170177
171178 // After all agents complete for this commit, run trace analysis
@@ -208,13 +215,13 @@ export async function runGitEvals2(
208215 }
209216 }
210217
211- for ( const [ agentId , agentData ] of results ) {
218+ for ( const [ agentId , agentData ] of Object . entries ( results ) ) {
212219 const successfulRuns = agentData . runs . filter ( ( r ) => ! r . error )
213220 const totalRuns = agentData . runs . length
214221
215222 agentData . averageScore =
216223 successfulRuns . length > 0
217- ? successfulRuns . reduce ( ( sum , r ) => sum + r . judgeScore , 0 ) /
224+ ? successfulRuns . reduce ( ( sum , r ) => sum + r . judging . overallScore , 0 ) /
218225 successfulRuns . length
219226 : 0
220227
@@ -229,7 +236,7 @@ export async function runGitEvals2(
229236 : 0
230237 }
231238
232- const result : GitEvals2Result = {
239+ const result = {
233240 agents : results ,
234241 timestamp : new Date ( ) . toISOString ( ) ,
235242 totalDuration : Date . now ( ) - startTime ,
@@ -241,20 +248,13 @@ export async function runGitEvals2(
241248 fs . mkdirSync ( outputDir , { recursive : true } )
242249 }
243250
244- const serializedResult = {
245- ...result ,
246- agents : Array . from ( result . agents . entries ( ) ) . map ( ( [ id , data ] ) => ( {
247- id,
248- ...data ,
249- } ) ) ,
250- }
251- fs . writeFileSync ( outputPath , JSON . stringify ( serializedResult , null , 2 ) )
251+ fs . writeFileSync ( outputPath , JSON . stringify ( result , null , 2 ) )
252252 console . log ( `\nResults written to ${ outputPath } ` )
253253 }
254254
255255 console . log ( `\nTraces saved to ${ logsDir } ` )
256256 console . log ( '\n=== Summary ===' )
257- for ( const [ agentId , data ] of results ) {
257+ for ( const [ agentId , data ] of Object . entries ( results ) ) {
258258 console . log ( `\n${ agentId } :` )
259259 console . log ( ` Score: ${ data . averageScore . toFixed ( 2 ) } /10` )
260260 console . log ( ` Cost: $${ data . averageCost . toFixed ( 4 ) } ` )
0 commit comments