|
| 1 | +import path from 'path' |
| 2 | + |
| 3 | +import { sendBasicEmail } from '@codebuff/internal/loops' |
| 4 | + |
| 5 | +import { runBuffBench } from './run-buffbench' |
| 6 | +import type { AgentEvalResults } from './types' |
| 7 | + |
| 8 | +async function main() { |
| 9 | + console.log('Starting nightly buffbench evaluation...') |
| 10 | + console.log('Agents: base, base2') |
| 11 | + console.log('Eval set: codebuff') |
| 12 | + console.log() |
| 13 | + |
| 14 | + const results = await runBuffBench({ |
| 15 | + evalDataPath: path.join(__dirname, 'eval-codebuff.json'), |
| 16 | + agents: ['base', 'base2'], |
| 17 | + taskConcurrency: 20, |
| 18 | + }) |
| 19 | + |
| 20 | + console.log('\nNightly buffbench evaluation completed successfully!') |
| 21 | + |
| 22 | + // Send email with results |
| 23 | + const recipientEmail = process.env.EVAL_RESULTS_EMAIL || 'team@codebuff.com' |
| 24 | + console.log(`\n📧 Sending buffbench results email to ${recipientEmail}...`) |
| 25 | + |
| 26 | + const { metadata, ...agentResults } = results |
| 27 | + const emailContent = formatBuffBenchEmailContent(agentResults, metadata) |
| 28 | + |
| 29 | + try { |
| 30 | + const emailResult = await sendBasicEmail({ |
| 31 | + email: recipientEmail, |
| 32 | + data: emailContent, |
| 33 | + logger: console, |
| 34 | + }) |
| 35 | + |
| 36 | + if (emailResult.success) { |
| 37 | + console.log('✅ BuffBench results email sent successfully!') |
| 38 | + } else { |
| 39 | + console.log('⚠️ Email sending was skipped (likely missing configuration)') |
| 40 | + } |
| 41 | + } catch (emailError) { |
| 42 | + console.error('❌ Failed to send buffbench results email:', emailError) |
| 43 | + } |
| 44 | + |
| 45 | + process.exit(0) |
| 46 | +} |
| 47 | + |
| 48 | +function formatBuffBenchEmailContent( |
| 49 | + results: Record<string, AgentEvalResults>, |
| 50 | + metadata: any, |
| 51 | +) { |
| 52 | + const agents = Object.keys(results) |
| 53 | + const date = new Date().toLocaleDateString() |
| 54 | + |
| 55 | + const agentScores = agents |
| 56 | + .map((agentId) => `${agentId}: ${results[agentId].averageScore.toFixed(1)}`) |
| 57 | + .join(' | ') |
| 58 | + |
| 59 | + const subject = `Nightly BuffBench Results - ${date} - ${agentScores}` |
| 60 | + |
| 61 | + const agentComparison = agents |
| 62 | + .map( |
| 63 | + (agentId) => |
| 64 | + `${agentId}: |
| 65 | + - Average Score: ${results[agentId].averageScore.toFixed(2)}/10 |
| 66 | + - Average Cost: ${results[agentId].averageCost.toFixed(4)} |
| 67 | + - Average Duration: ${(results[agentId].averageDuration / 1000).toFixed(1)}s |
| 68 | + - Valid Runs: ${results[agentId].runs.length}`, |
| 69 | + ) |
| 70 | + .join('\n\n') |
| 71 | + |
| 72 | + const message = `📊 NIGHTLY BUFFBENCH RESULTS |
| 73 | +
|
| 74 | +📈 AGENT RESULTS: |
| 75 | +${agentComparison} |
| 76 | +
|
| 77 | +📁 Results Location: ${metadata.logsDirectory} |
| 78 | +⏱️ Total Evaluation Time: ${(metadata.totalDuration / 1000 / 60).toFixed(1)} minutes |
| 79 | +• Total Tasks: ${metadata.commitsEvaluated} |
| 80 | +• Agents Tested: ${agents.join(', ')} |
| 81 | +
|
| 82 | +Generated on: ${metadata.timestamp} |
| 83 | +Repository: ${metadata.repoUrl}` |
| 84 | + |
| 85 | + return { subject, message } |
| 86 | +} |
| 87 | + |
| 88 | +if (import.meta.main) { |
| 89 | + main().catch((error) => { |
| 90 | + console.error('Error running nightly buffbench:', error) |
| 91 | + process.exit(1) |
| 92 | + }) |
| 93 | +} |
0 commit comments