@@ -70,6 +70,10 @@ class RunEvalSetCommand extends Command {
7070 description : 'Coding agent to use' ,
7171 default : 'codebuff' ,
7272 } ) ,
73+ agent : Flags . string ( {
74+ description : 'Codebuff agent id to use' ,
75+ default : 'base' ,
76+ } ) ,
7377 help : Flags . help ( { char : 'h' } ) ,
7478 }
7579
@@ -89,6 +93,7 @@ async function runEvalSet(options: {
8993 title ?: string
9094 concurrency ?: number
9195 'coding-agent' : string
96+ agent : string
9297} ) : Promise < void > {
9398 const {
9499 'output-dir' : outputDir ,
@@ -98,6 +103,7 @@ async function runEvalSet(options: {
98103 insert : shouldInsert ,
99104 title,
100105 'coding-agent' : codingAgentstr ,
106+ agent,
101107 } = options
102108
103109 if ( ! [ 'codebuff' , 'claude' ] . includes ( codingAgentstr ) ) {
@@ -127,32 +133,28 @@ async function runEvalSet(options: {
127133 name : 'codebuff' ,
128134 evalDataPath : path . join ( __dirname , 'eval-codebuff2.json' ) ,
129135 outputDir,
130- agentType : undefined ,
131136 } ,
132137 {
133138 name : 'manifold' ,
134139 evalDataPath : path . join ( __dirname , 'eval-manifold2.json' ) ,
135140 outputDir,
136- agentType : undefined ,
137141 } ,
138142 {
139143 name : 'plane' ,
140144 evalDataPath : path . join ( __dirname , 'eval-plane.json' ) ,
141145 outputDir,
142- agentType : undefined ,
143146 } ,
144147 {
145148 name : 'saleor' ,
146149 evalDataPath : path . join ( __dirname , 'eval-saleor.json' ) ,
147150 outputDir,
148- agentType : undefined ,
149151 } ,
150152 ]
151153
152154 console . log ( `Running ${ evalConfigs . length } evaluations:` )
153155 evalConfigs . forEach ( ( config ) => {
154156 console . log (
155- ` - ${ config . name } : ${ config . evalDataPath } -> ${ config . outputDir } (${ config . agentType } )` ,
157+ ` - ${ config . name } : ${ config . evalDataPath } -> ${ config . outputDir } (${ agent } )` ,
156158 )
157159 } )
158160
@@ -174,6 +176,7 @@ async function runEvalSet(options: {
174176 codingAgent ,
175177 config . limit ,
176178 options . concurrency === 1 ,
179+ agent ,
177180 )
178181 } catch ( error ) {
179182 const evalDuration = Date . now ( ) - evalStartTime
@@ -360,7 +363,7 @@ async function runEvalSet(options: {
360363 const payload : GitEvalResultRequest = {
361364 cost_mode : 'normal' , // You can modify this based on your needs
362365 reasoner_model : undefined , // No longer using model config
363- agent_model : config ?. agentType ,
366+ agent_model : agent ,
364367 metadata : {
365368 numCases : evalResult ?. overall_metrics ?. total_runs ,
366369 avgScore : evalResult ?. overall_metrics ?. average_overall ,
0 commit comments