@@ -53,6 +53,13 @@ export function describeAccuracyTests(
5353 const toolCalls = testTools . getToolCalls ( ) ;
5454 const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
5555 const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
56+ console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
57+ console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
58+ console . debug (
59+ "Tool calling accuracy: %s, Parameter Accuracy: %s" ,
60+ toolCallingAccuracy ,
61+ parameterMatchingAccuracy
62+ ) ;
5663 if ( accuracyDatetime && accuracyCommit ) {
5764 await appendAccuracySnapshot ( {
5865 datetime : accuracyDatetime ,
@@ -68,19 +75,6 @@ export function describeAccuracyTests(
6875 `Skipping accuracy snapshot update for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } `
6976 ) ;
7077 }
71-
72- try {
73- expect ( toolCallingAccuracy ) . not . toEqual ( 0 ) ;
74- expect ( parameterMatchingAccuracy ) . toBeGreaterThanOrEqual ( 0.5 ) ;
75- } catch ( error ) {
76- console . warn ( `Accuracy test failed for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } ` ) ;
77- console . debug ( `Provided tools` , JSON . stringify ( toolsForModel , null , 2 ) ) ;
78- console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
79- console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
80- console . debug ( `Tool calling accuracy` , toolCallingAccuracy ) ;
81- console . debug ( `Parameter matching accuracy` , parameterMatchingAccuracy ) ;
82- throw error ;
83- }
8478 } ) ;
8579 } ) ;
8680}
0 commit comments