@@ -10,6 +10,7 @@ import { availableModels } from "./models/index.js";
1010import { ToolDefinition } from "./models/model.js" ;
1111import { zodToJsonSchema } from "zod-to-json-schema" ;
1212
13+ type ToolMockReturn = { content : Array < { type : string ; text : string } > } ;
1314class ToolMock {
1415 readonly name : string ;
1516 arguments : unknown ;
@@ -27,7 +28,7 @@ class ToolMock {
2728 return this ;
2829 }
2930
30- thenReturn ( value : unknown ) : this {
31+ thenReturn ( value : ToolMockReturn ) : this {
3132 this . returns = value ;
3233 return this ;
3334 }
@@ -55,6 +56,36 @@ type AccuracyTestCaseFn = (tools: AccuracyToolSetupFunction) => void;
5556type AccuracyItFn = ( prompt : string , testCase : AccuracyTestCaseFn ) => void ;
5657type AccuracyTestSuite = { prompt : AccuracyItFn } ;
5758
59+ type NonMockedCallError = { tool : string ; args : unknown } ;
60+
61+ function logVerbose ( ...args : unknown [ ] ) : void {
62+ if ( process . env . MONGODB_MCP_TEST_VERBOSE === "true" ) {
63+ console . log ( ...args ) ;
64+ }
65+ }
66+
67+ function printModelPlanIfVerbose ( model : string , plan : string [ ] ) : void {
68+ logVerbose ( model , "📝: " , plan . join ( "\n" ) ) ;
69+ }
70+
71+ function testPromptIsVerbose ( model : string , prompt : string ) : void {
72+ logVerbose ( model , "📜: " , prompt ) ;
73+ }
74+
75+ function modelSaidVerbose ( model : string , response : string ) : void {
76+ if ( response . length > 0 ) {
77+ logVerbose ( model , "🗣️: " , response ) ;
78+ }
79+ }
80+
81+ function modelToolCalledVerbose ( model : string , toolCall : string , args : unknown ) : void {
82+ logVerbose ( model , "🛠️: " , toolCall , JSON . stringify ( args ) ) ;
83+ }
84+
85+ function toolCallsReturnedVerbose ( model : string , answer : string ) : void {
86+ logVerbose ( model , "📋: " , answer ) ;
87+ }
88+
5889export function describeAccuracyTest ( useCase : string , testCaseFn : ( testSuite : AccuracyTestSuite ) => void ) {
5990 const models = availableModels ( ) ;
6091 if ( models . length === 0 ) {
@@ -105,8 +136,13 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
105136
106137 const promptFn : AccuracyItFn = ( prompt : string , testCase : AccuracyTestCaseFn ) => {
107138 it ( prompt , async ( ) => {
139+ testPromptIsVerbose ( model . name , prompt ) ;
140+
108141 const mcpServerUnsafe = ( mcpServer as unknown as McpServerUnsafe ) . mcpServer ;
109142 const tools = mcpServerUnsafe [ "_registeredTools" ] as { [ toolName : string ] : RegisteredTool } ;
143+ const mockedTools = new Set < string > ( ) ;
144+ const nonMockedCallErrors = new Array < NonMockedCallError > ( ) ;
145+
110146 const toolDefinitions = Object . entries ( tools ) . map ( ( [ toolName , tool ] ) => {
111147 if ( ! tool . inputSchema ) {
112148 throw new Error ( `Tool ${ toolName } does not have an input schema defined.` ) ;
@@ -136,17 +172,22 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
136172 return toolForApi ;
137173 } ) ;
138174
139- const mocks : Array < ToolMock > = [ ] ;
175+ const plan = await model . generatePlan ( prompt , toolDefinitions ) ;
176+ printModelPlanIfVerbose ( model . name , plan ) ;
177+
178+
179+ const mocks : Array < ToolMock > = [ ] ;
140180 const toolFn : AccuracyToolSetupFunction = ( toolName : string ) => {
141181 const mock = new ToolMock ( toolName ) ;
182+ mockedTools . add ( toolName ) ;
142183
143184 const mcpServerUnsafe = ( mcpServer as unknown as McpServerUnsafe ) . mcpServer ;
144185 const tools = mcpServerUnsafe [ "_registeredTools" ] as { [ toolName : string ] : RegisteredTool } ;
145186
146187 if ( tools [ toolName ] !== undefined ) {
147188 tools [ toolName ] . callback = ( ( args : unknown ) => {
148189 mock . _wasCalledWith ( args ) ;
149- return mock . returns ;
190+ return Promise . resolve ( mock . returns ) ;
150191 } ) as unknown as ToolCallback ;
151192 }
152193
@@ -157,30 +198,55 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
157198 testCase ( toolFn ) ;
158199
159200 const consumePromptUntilNoMoreCall = async ( prompt : string [ ] ) => {
160- const promptStr = prompt . join ( "\n" ) ;
161- const response = await model . generateContent ( promptStr , toolDefinitions ) ;
201+ const response = await model . generateContent ( prompt , toolDefinitions ) ;
162202
203+ modelSaidVerbose ( model . name , response . text || "<no text>" ) ;
163204 if ( response . toolCall . length > 0 ) {
164205 const toolCallResults = await Promise . all (
165- response . toolCall . map ( ( tc ) =>
166- mcpClient . callTool ( {
206+ response . toolCall . map ( ( tc ) => {
207+ modelToolCalledVerbose ( model . name , tc . name , tc . args ) ;
208+
209+ if ( ! mockedTools . has ( tc . name ) ) {
210+ nonMockedCallErrors . push ( { tool : tc . name , args : tc . args } ) ;
211+ }
212+
213+ return mcpClient . callTool ( {
167214 name : tc . name ,
168215 arguments : tc . args ,
169- } )
170- )
216+ } ) ;
217+ } )
171218 ) ;
172- const newPrompt = toolCallResults . flatMap ( ( result ) =>
219+
220+ const responseParts = toolCallResults . flatMap ( ( result ) =>
173221 ( result . content as Array < { text : string } > ) . map ( ( c ) => c . text )
174222 ) ;
175223
176- if ( newPrompt . join ( "\n" ) . trim ( ) . length > 0 ) {
224+ const newPrompt = prompt . concat ( responseParts ) ;
225+ toolCallsReturnedVerbose ( model . name , newPrompt . join ( "\n" ) ) ;
226+
227+ if ( responseParts . length > 0 ) {
177228 return consumePromptUntilNoMoreCall ( newPrompt ) ;
178229 }
179230 }
180231 } ;
181232
233+ for ( const step of plan ) {
234+ await consumePromptUntilNoMoreCall ( [ step ] ) ;
235+ }
236+
182237 await consumePromptUntilNoMoreCall ( [ prompt ] ) ;
238+
183239 mocks . forEach ( ( mock ) => mock . _verify ( ) ) ;
240+ if ( nonMockedCallErrors . length > 0 ) {
241+ for ( const call of nonMockedCallErrors ) {
242+ console . error (
243+ `Non-mocked tool call detected: ${ call . tool } with args:` ,
244+ JSON . stringify ( call . args , null , 2 )
245+ ) ;
246+ }
247+
248+ throw new Error ( "Non-mocked tool calls detected. Check the console for details." ) ;
249+ }
184250 } ) ;
185251 } ;
186252
0 commit comments