@@ -37,28 +37,37 @@ export function parameterMatchingAccuracyScorer(
3737 return 1 ;
3838 }
3939
40- const toolCallScores : number [ ] = [ ] ;
41- const checkedToolCallIds = new Set < string > ( ) ;
40+ const usedActualIndexes = new Set < number > ( ) ;
41+ const scores : number [ ] = [ ] ;
4242
43- for ( const expectedToolCall of expectedToolCalls ) {
44- const matchingActualToolCall = actualToolCalls . find (
45- ( actualToolCall ) =>
46- actualToolCall . toolName === expectedToolCall . toolName &&
47- ! checkedToolCallIds . has ( actualToolCall . toolCallId )
48- ) ;
43+ for ( const expectedCall of expectedToolCalls ) {
44+ // Find all unmatched actual tool calls with the same tool name
45+ const candidates = actualToolCalls
46+ . map ( ( call , index ) => ( { call, index } ) )
47+ . filter ( ( { call, index } ) => ! usedActualIndexes . has ( index ) && call . toolName === expectedCall . toolName ) ;
4948
50- if ( ! matchingActualToolCall ) {
51- toolCallScores . push ( 0 ) ;
49+ if ( candidates . length === 0 ) {
50+ scores . push ( 0 ) ;
5251 continue ;
5352 }
5453
55- checkedToolCallIds . add ( matchingActualToolCall . toolCallId ) ;
56- const score = compareParams ( expectedToolCall . parameters , matchingActualToolCall . parameters ) ;
57- toolCallScores . push ( score ) ;
54+ // Pick the candidate with the best parameter match
55+ let bestScore = - 1 ;
56+ let bestIndex = - 1 ;
57+ for ( const { call, index } of candidates ) {
58+ const score = compareParams ( expectedCall . parameters , call . parameters ) ;
59+ if ( score > bestScore ) {
60+ bestScore = score ;
61+ bestIndex = index ;
62+ }
63+ }
64+
65+ usedActualIndexes . add ( bestIndex ) ;
66+ scores . push ( bestScore ) ;
5867 }
5968
60- const totalScore = toolCallScores . reduce ( ( sum , score ) => sum + score , 0 ) ;
61- return totalScore / toolCallScores . length ;
69+ const totalScore = scores . reduce ( ( sum , score ) => sum + score , 0 ) ;
70+ return totalScore / scores . length ;
6271}
6372
6473/**
0 commit comments