1- import { Tool } from "@modelcontextprotocol/sdk/types.js" ;
2- import { discoverMongoDBTools , TestTools , MockedTools } from "./test-tools.js" ;
31import { TestableModels } from "./models.js" ;
42import { ExpectedToolCall , parameterMatchingAccuracyScorer , toolCallingAccuracyScorer } from "./accuracy-scorers.js" ;
53import { Agent , getVercelToolCallingAgent } from "./agent.js" ;
6- import { appendAccuracySnapshot } from "./accuracy-snapshot.js" ;
4+ import { prepareTestData , setupMongoDBIntegrationTest } from "../../integration/tools/mongodb/mongodbHelpers.js" ;
5+ import { AccuracyTestingClient , MockedTools } from "./accuracy-testing-client.js" ;
76
87export interface AccuracyTestConfig {
98 systemPrompt ?: string ;
@@ -13,68 +12,71 @@ export interface AccuracyTestConfig {
1312 mockedTools : MockedTools ;
1413}
1514
15+ export function describeSuite ( suiteName : string , testConfigs : AccuracyTestConfig [ ] ) {
16+ return {
17+ [ suiteName ] : testConfigs ,
18+ } ;
19+ }
20+
1621export function describeAccuracyTests (
17- suiteName : string ,
1822 models : TestableModels ,
19- accuracyTestConfigs : AccuracyTestConfig [ ]
23+ accuracyTestConfigs : {
24+ [ suiteName : string ] : AccuracyTestConfig [ ] ;
25+ }
2026) {
21- const accuracyDatetime = process . env . MDB_ACCURACY_DATETIME ;
22- const accuracyCommit = process . env . MDB_ACCURACY_COMMIT ;
23-
2427 if ( ! models . length ) {
25- console . warn ( `No models available to test ${ suiteName } ` ) ;
26- return ;
28+ throw new Error ( "No models available to test!" ) ;
2729 }
2830
2931 const eachModel = describe . each ( models ) ;
30- const eachTest = it . each ( accuracyTestConfigs ) ;
32+ const eachSuite = describe . each ( Object . keys ( accuracyTestConfigs ) ) ;
33+
34+ eachModel ( `$modelName` , function ( model ) {
35+ const mdbIntegration = setupMongoDBIntegrationTest ( ) ;
36+ const populateTestData = prepareTestData ( mdbIntegration ) ;
3137
32- eachModel ( `$modelName - ${ suiteName } ` , function ( model ) {
33- let mcpTools : Tool [ ] ;
34- let testTools : TestTools ;
38+ let testMCPClient : AccuracyTestingClient ;
3539 let agent : Agent ;
3640
3741 beforeAll ( async ( ) => {
38- mcpTools = await discoverMongoDBTools ( ) ;
42+ testMCPClient = await AccuracyTestingClient . initializeClient ( mdbIntegration . connectionString ( ) ) ;
43+ agent = getVercelToolCallingAgent ( ) ;
3944 } ) ;
4045
41- beforeEach ( ( ) => {
42- testTools = new TestTools ( mcpTools ) ;
43- agent = getVercelToolCallingAgent ( ) ;
46+ beforeEach ( async ( ) => {
47+ await populateTestData ( ) ;
48+ testMCPClient . resetForTests ( ) ;
49+ } ) ;
50+
51+ afterAll ( async ( ) => {
52+ await testMCPClient . close ( ) ;
4453 } ) ;
4554
46- eachTest ( "$prompt" , async function ( testConfig ) {
47- testTools . mockTools ( testConfig . mockedTools ) ;
48- const toolsForModel = testTools . vercelAiTools ( ) ;
49- const promptForModel = testConfig . injectConnectedAssumption
50- ? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
51- : testConfig . prompt ;
52- const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
53- const toolCalls = testTools . getToolCalls ( ) ;
54- const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
55- const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
56- console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
57- console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
58- console . debug (
59- "Tool calling accuracy: %s, Parameter Accuracy: %s" ,
60- toolCallingAccuracy ,
61- parameterMatchingAccuracy
62- ) ;
63- if ( accuracyDatetime && accuracyCommit ) {
64- await appendAccuracySnapshot ( {
65- datetime : accuracyDatetime ,
66- commit : accuracyCommit ,
67- model : model . modelName ,
68- suite : suiteName ,
69- test : testConfig . prompt ,
55+ eachSuite ( "%s" , function ( suiteName ) {
56+ const eachTest = it . each ( accuracyTestConfigs [ suiteName ] ?? [ ] ) ;
57+
58+ eachTest ( "$prompt" , async function ( testConfig ) {
59+ testMCPClient . mockTools ( testConfig . mockedTools ) ;
60+ const toolsForModel = await testMCPClient . vercelTools ( ) ;
61+ const promptForModel = testConfig . injectConnectedAssumption
62+ ? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
63+ : testConfig . prompt ;
64+ const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
65+ const toolCalls = testMCPClient . getToolCalls ( ) ;
66+ const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
67+ const parameterMatchingAccuracy = parameterMatchingAccuracyScorer (
68+ testConfig . expectedToolCalls ,
69+ toolCalls
70+ ) ;
71+ console . debug ( testConfig . prompt ) ;
72+ console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
73+ // console.debug(`Tool calls`, JSON.stringify(toolCalls, null, 2));
74+ console . debug (
75+ "Tool calling accuracy: %s, Parameter Accuracy: %s" ,
7076 toolCallingAccuracy ,
71- parameterAccuracy : parameterMatchingAccuracy ,
72- } ) ;
73- } else {
74- console . info (
75- `Skipping accuracy snapshot update for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } `
77+ parameterMatchingAccuracy
7678 ) ;
77- }
79+ } ) ;
7880 } ) ;
7981 } ) ;
8082}
0 commit comments