@@ -45,10 +45,11 @@ export function describeAccuracyTests(
4545
4646 eachTest ( "$prompt" , async function ( testConfig ) {
4747 testTools . mockTools ( testConfig . mockedTools ) ;
48+ const toolsForModel = testTools . vercelAiTools ( ) ;
4849 const promptForModel = testConfig . injectConnectedAssumption
4950 ? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
5051 : testConfig . prompt ;
51- const conversation = await agent . prompt ( promptForModel , model , testTools . vercelAiTools ( ) ) ;
52+ const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
5253 const toolCalls = testTools . getToolCalls ( ) ;
5354 const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
5455 const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
@@ -73,6 +74,7 @@ export function describeAccuracyTests(
7374 expect ( parameterMatchingAccuracy ) . toBeGreaterThanOrEqual ( 0.5 ) ;
7475 } catch ( error ) {
7576 console . warn ( `Accuracy test failed for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } ` ) ;
77+ console . debug ( `Provided tools` , JSON . stringify ( toolsForModel , null , 2 ) ) ;
7678 console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
7779 console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
7880 console . debug ( `Tool calling accuracy` , toolCallingAccuracy ) ;
0 commit comments