@@ -3,6 +3,7 @@ import type { JudgingResult } from './judge'
33import type { AgentDefinition } from '../../sdk/src'
44import type { CodebuffClient } from '../../sdk/src/client'
55import { withTimeout } from '@codebuff/common/util/promise'
6+ import { getErrorObject } from '@codebuff/common/util/error'
67
78export interface AgentTraceData {
89 agentId : string
@@ -156,7 +157,7 @@ const traceAnalyzerAgent: AgentDefinition = {
156157## Your Role
157158
158159You will receive:
159- 1. A task specification (for context only)
160+ 1. A task prompt (for context only)
1601612. Full traces from each agent showing their step-by-step process
1611623. Performance metrics (scores, cost, time, errors)
162163
@@ -190,11 +191,11 @@ Note: read_files tool results show [TRUNCATED] for file contents to save space.`
190191export async function analyzeAgentTraces ( {
191192 client,
192193 traces,
193- spec ,
194+ codingAgentPrompt ,
194195} : {
195196 client : CodebuffClient
196197 traces : AgentTraceData [ ]
197- spec : string
198+ codingAgentPrompt : string
198199} ) : Promise < {
199200 overallAnalysis : string
200201 agentFeedback : Array < {
@@ -204,17 +205,18 @@ export async function analyzeAgentTraces({
204205 recommendations : string [ ]
205206 } >
206207} > {
207- const truncatedTraces = traces . map ( ( t ) => ( {
208- agentId : t . agentId ,
209- trace : truncateTrace ( t . trace ) ,
210- judgeResult : t . judgeResult ,
211- cost : t . cost ,
212- durationMs : t . durationMs ,
213- error : t . error ,
214- } ) )
208+ try {
209+ const truncatedTraces = traces . map ( ( t ) => ( {
210+ agentId : t . agentId ,
211+ trace : truncateTrace ( t . trace ) ,
212+ judgeResult : t . judgeResult ,
213+ cost : t . cost ,
214+ durationMs : t . durationMs ,
215+ error : t . error ,
216+ } ) )
215217
216- const prompt = `## Task Specification (for context)
217- ${ spec }
218+ const prompt = `## Coding Agent Prompt (for context)
219+ ${ codingAgentPrompt }
218220
219221## Agent Traces and Results
220222${ JSON . stringify ( truncatedTraces , null , 2 ) }
@@ -239,39 +241,46 @@ Analyze how these agents approached the problem, focusing on their processes and
239241
240242Focus on the HOW, not the WHAT: We want to understand and improve how agents work, not evaluate their specific code output.`
241243
242- const agentOutput : string [ ] = [ ]
243- const analyzerResult = await withTimeout (
244- client . run ( {
245- agent : 'git-evals2-trace-analyzer' ,
246- prompt,
247- agentDefinitions : [ traceAnalyzerAgent ] ,
248- handleEvent : ( event ) => {
249- if ( event . type === 'text' ) {
250- agentOutput . push ( event . text )
251- } else if ( event . type === 'tool_call' ) {
252- agentOutput . push ( JSON . stringify ( event , null , 2 ) )
253- } else if ( event . type === 'error' ) {
254- console . warn ( '[Trace Analyzer] Error event:' , event . message )
255- }
256- } ,
257- } ) ,
258- 10 * 60 * 1000 ,
259- 'Trace analyzer agent timed out after 10 minutes' ,
260- )
244+ const agentOutput : string [ ] = [ ]
245+ const analyzerResult = await withTimeout (
246+ client . run ( {
247+ agent : 'git-evals2-trace-analyzer' ,
248+ prompt,
249+ agentDefinitions : [ traceAnalyzerAgent ] ,
250+ handleEvent : ( event ) => {
251+ if ( event . type === 'text' ) {
252+ agentOutput . push ( event . text )
253+ } else if ( event . type === 'tool_call' ) {
254+ agentOutput . push ( JSON . stringify ( event , null , 2 ) )
255+ } else if ( event . type === 'error' ) {
256+ console . warn ( '[Trace Analyzer] Error event:' , event . message )
257+ }
258+ } ,
259+ } ) ,
260+ 10 * 60 * 1000 ,
261+ 'Trace analyzer agent timed out after 10 minutes' ,
262+ )
261263
262- const { output } = analyzerResult
264+ const { output } = analyzerResult
263265
264- if ( output . type !== 'structuredOutput' || output . value === null ) {
265- console . error (
266- 'Error running trace analyzer - not structured output' ,
267- JSON . stringify ( output , null , 2 ) ,
268- )
269- console . error ( 'Trace analyzer output trace:' , agentOutput . join ( '' ) )
266+ if ( output . type !== 'structuredOutput' || output . value === null ) {
267+ console . error (
268+ 'Error running trace analyzer - not structured output' ,
269+ JSON . stringify ( output , null , 2 ) ,
270+ )
271+ console . error ( 'Trace analyzer output trace:' , agentOutput . join ( '' ) )
272+ return {
273+ overallAnalysis : 'Error running trace analyzer - not structured output' ,
274+ agentFeedback : [ ] ,
275+ }
276+ }
277+
278+ return output . value as any
279+ } catch ( error ) {
280+ console . error ( `Failed to analyze traces:` , getErrorObject ( error ) )
270281 return {
271- overallAnalysis : ' Error running trace analyzer - not structured output' ,
282+ overallAnalysis : ` Error running trace analyzer: ${ getErrorObject ( error ) . message } ` ,
272283 agentFeedback : [ ] ,
273284 }
274285 }
275-
276- return output . value as any
277286}
0 commit comments