Skip to content

Commit c7e3d5f

Browse files
committed
buffbench: Option to disable analysis
1 parent f3573dc commit c7e3d5f

File tree

1 file changed

+43
-32
lines changed

1 file changed

+43
-32
lines changed

evals/buffbench/run-buffbench.ts

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ async function runTask(options: {
3737
extractLessons: boolean
3838
printEvents: boolean
3939
finalCheckCommands?: string[]
40+
disableAnalysis?: boolean
4041
}) {
4142
const {
4243
client,
@@ -53,6 +54,7 @@ async function runTask(options: {
5354
extractLessons,
5455
printEvents,
5556
finalCheckCommands,
57+
disableAnalysis,
5658
} = options
5759

5860
console.log(
@@ -161,12 +163,14 @@ async function runTask(options: {
161163
const agentResults = await Promise.all(agentPromises)
162164

163165
// After all agents complete for this commit, run trace analysis
164-
const traceAnalysis = await analyzeAgentTraces({
165-
client,
166-
traces: commitTraces,
167-
codingAgentPrompt: commit.prompt,
168-
analyzerContext,
169-
})
166+
const traceAnalysis = disableAnalysis
167+
? undefined
168+
: await analyzeAgentTraces({
169+
client,
170+
traces: commitTraces,
171+
codingAgentPrompt: commit.prompt,
172+
analyzerContext,
173+
})
170174

171175
const analysisData = {
172176
commitSha: commit.sha,
@@ -268,13 +272,15 @@ export async function runBuffBench(options: {
268272
client?: CodebuffClient
269273
taskIds?: string[]
270274
extractLessons?: boolean
275+
disableAnalysis?: boolean
271276
}) {
272277
const {
273278
evalDataPath,
274279
agents,
275280
taskConcurrency = 1,
276281
taskIds,
277282
extractLessons = false,
283+
disableAnalysis = false,
278284
} = options
279285

280286
const evalData: EvalDataV2 = JSON.parse(
@@ -384,6 +390,7 @@ export async function runBuffBench(options: {
384390
extractLessons,
385391
printEvents: agents.length === 1 && taskConcurrency === 1,
386392
finalCheckCommands: evalData.finalCheckCommands,
393+
disableAnalysis,
387394
}),
388395
),
389396
)
@@ -448,36 +455,40 @@ export async function runBuffBench(options: {
448455

449456
const logFiles = fs.readdirSync(logsDir)
450457

451-
const metaAnalysis = await analyzeAllTasks({
452-
client,
453-
logsDir,
454-
agents,
455-
analyzerContext,
456-
})
458+
const metaAnalysis = disableAnalysis
459+
? undefined
460+
: await analyzeAllTasks({
461+
client,
462+
logsDir,
463+
agents,
464+
analyzerContext,
465+
})
457466

458-
// Print meta-analysis results
459-
console.log('\n=== Meta-Analysis Results ===')
460-
console.log('\nOverall Comparison:')
461-
console.log(metaAnalysis.overallComparison)
462-
463-
if (metaAnalysis.agentInsights.length > 0) {
464-
console.log('\nAgent-Specific Insights:')
465-
for (const insight of metaAnalysis.agentInsights) {
466-
console.log(`\n[${insight.agentId}]`)
467-
if (insight.consistentStrengths.length > 0) {
468-
console.log(' Strengths:', insight.consistentStrengths.join(', '))
469-
}
470-
if (insight.consistentWeaknesses.length > 0) {
471-
console.log(' Weaknesses:', insight.consistentWeaknesses.join(', '))
467+
if (metaAnalysis) {
468+
// Print meta-analysis results
469+
console.log('\n=== Meta-Analysis Results ===')
470+
console.log('\nOverall Comparison:')
471+
console.log(metaAnalysis.overallComparison)
472+
473+
if (metaAnalysis.agentInsights.length > 0) {
474+
console.log('\nAgent-Specific Insights:')
475+
for (const insight of metaAnalysis.agentInsights) {
476+
console.log(`\n[${insight.agentId}]`)
477+
if (insight.consistentStrengths.length > 0) {
478+
console.log(' Strengths:', insight.consistentStrengths.join(', '))
479+
}
480+
if (insight.consistentWeaknesses.length > 0) {
481+
console.log(' Weaknesses:', insight.consistentWeaknesses.join(', '))
482+
}
472483
}
473484
}
474-
}
475485

476-
if (metaAnalysis.keyFindings.length > 0) {
477-
console.log('\nKey Findings:')
478-
metaAnalysis.keyFindings.forEach((finding, i) => {
479-
console.log(` ${i + 1}. ${finding}`)
480-
})
486+
if (metaAnalysis.keyFindings.length > 0) {
487+
console.log('\nKey Findings:')
488+
metaAnalysis.keyFindings.forEach((finding, i) => {
489+
console.log(` ${i + 1}. ${finding}`)
490+
})
491+
}
481492
}
482493

483494
const finalResults = {

0 commit comments

Comments
 (0)