22// SPDX-License-Identifier: Apache-2.0
33import React , { useState , useEffect } from 'react' ;
44import PropTypes from 'prop-types' ;
5- import { Container , Header , SpaceBetween , Table , Box , Button , ButtonDropdown } from '@cloudscape-design/components' ;
5+ import { Container , Header , SpaceBetween , Table , Box , Button , ButtonDropdown , ProgressBar } from '@cloudscape-design/components' ;
66import { generateClient } from 'aws-amplify/api' ;
77import COMPARE_TEST_RUNS from '../../graphql/queries/compareTestRuns' ;
88import TestStudioHeader from './TestStudioHeader' ;
@@ -12,6 +12,7 @@ const client = generateClient();
1212const TestComparison = ( { preSelectedTestRunIds = [ ] } ) => {
1313 const [ comparisonData , setComparisonData ] = useState ( null ) ;
1414 const [ comparing , setComparing ] = useState ( false ) ;
15+ const [ currentAttempt , setCurrentAttempt ] = useState ( 1 ) ;
1516
1617 useEffect ( ( ) => {
1718 const fetchComparison = async ( ) => {
@@ -22,10 +23,60 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
2223
2324 try {
2425 console . log ( 'Making GraphQL request...' ) ;
25- const result = await client . graphql ( {
26- query : COMPARE_TEST_RUNS ,
27- variables : { testRunIds : preSelectedTestRunIds } ,
28- } ) ;
26+ let result ;
27+ let attempt = 1 ;
28+ const maxRetries = 5 ;
29+
30+ while ( attempt <= maxRetries ) {
31+ try {
32+ setCurrentAttempt ( attempt ) ;
33+ result = await client . graphql ( {
34+ query : COMPARE_TEST_RUNS ,
35+ variables : { testRunIds : preSelectedTestRunIds } ,
36+ } ) ;
37+ setCurrentAttempt ( 5 ) ; // Set to 100% before completing
38+ await new Promise ( ( resolve ) => setTimeout ( resolve , 500 ) ) ; // Brief pause to show 100%
39+ break ;
40+ } catch ( error ) {
41+ const isTimeout =
42+ error . message ?. toLowerCase ( ) . includes ( 'timeout' ) ||
43+ error . code === 'TIMEOUT' ||
44+ error . message ?. includes ( 'Request failed with status code 504' ) ||
45+ error . name === 'TimeoutError' ||
46+ error . code === 'NetworkError' ||
47+ error . errors ?. some ( err =>
48+ err . errorType === 'Lambda:ExecutionTimeoutException' ||
49+ err . message ?. toLowerCase ( ) . includes ( 'timeout' )
50+ ) ;
51+ if ( isTimeout && attempt < maxRetries ) {
52+ console . log ( `COMPARE_TEST_RUNS attempt ${ attempt } failed, retrying...` , error . message ) ;
53+ attempt ++ ;
54+
55+ // Animate progress during 5-second wait
56+ const waitTime = 5000 ;
57+ const intervalTime = 100 ;
58+ const steps = waitTime / intervalTime ;
59+ const startProgress = ( attempt - 1 ) * 20 ;
60+ const endProgress = attempt * 20 ;
61+ const progressStep = ( endProgress - startProgress ) / steps ;
62+
63+ let currentProgress = startProgress ;
64+ const progressInterval = setInterval ( ( ) => {
65+ currentProgress += progressStep ;
66+ setCurrentAttempt ( Math . min ( currentProgress / 20 , 5 ) ) ;
67+ } , intervalTime ) ;
68+
69+ await new Promise ( ( resolve ) => setTimeout ( ( ) => {
70+ clearInterval ( progressInterval ) ;
71+ setCurrentAttempt ( attempt ) ;
72+ resolve ( ) ;
73+ } , waitTime ) ) ;
74+
75+ continue ;
76+ }
77+ throw error ;
78+ }
79+ }
2980
3081 const compareData = result . data . compareTestRuns ;
3182
@@ -90,17 +141,27 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
90141 ) ,
91142 ] ,
92143 [
93- 'Overall Accuracy' ,
144+ 'Average Accuracy' ,
94145 ...Object . values ( completeTestRuns ) . map ( ( run ) =>
95- run . overallAccuracy !== null && run . overallAccuracy !== undefined ? ` ${ ( run . overallAccuracy * 100 ) . toFixed ( 1 ) } %` : 'N/A' ,
146+ run . overallAccuracy !== null && run . overallAccuracy !== undefined ? run . overallAccuracy . toFixed ( 3 ) : 'N/A' ,
96147 ) ,
97148 ] ,
98149 [
99- 'Overall Confidence' ,
150+ 'Average Confidence' ,
100151 ...Object . values ( completeTestRuns ) . map ( ( run ) =>
101152 run . averageConfidence !== null && run . averageConfidence !== undefined ? `${ ( run . averageConfidence * 100 ) . toFixed ( 1 ) } %` : 'N/A' ,
102153 ) ,
103154 ] ,
155+ [
156+ 'Average Weighted Overall Score' ,
157+ ...Object . values ( completeTestRuns ) . map ( ( run ) => {
158+ if ( run . weightedOverallScores && run . weightedOverallScores . length > 0 ) {
159+ const avg = run . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / run . weightedOverallScores . length ;
160+ return avg . toFixed ( 3 ) ;
161+ }
162+ return 'N/A' ;
163+ } ) ,
164+ ] ,
104165 [
105166 'Duration' ,
106167 ...Object . values ( completeTestRuns ) . map ( ( run ) => {
@@ -186,6 +247,44 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
186247 usageRows . push ( row ) ;
187248 } ) ;
188249
250+ // Add accuracy breakdown rows
251+ const accuracyRows = [ ] ;
252+ const allAccuracyMetrics = new Set ( ) ;
253+ Object . values ( completeTestRuns ) . forEach ( ( testRun ) => {
254+ if ( testRun . accuracyBreakdown ) {
255+ Object . keys ( testRun . accuracyBreakdown ) . forEach ( ( metric ) => {
256+ allAccuracyMetrics . add ( metric ) ;
257+ } ) ;
258+ }
259+ } ) ;
260+
261+ // Add accuracy breakdown header
262+ accuracyRows . push ( [ 'Accuracy Metric' , ...Object . keys ( completeTestRuns ) ] ) ;
263+
264+ // Add accuracy breakdown metrics
265+ Array . from ( allAccuracyMetrics ) . forEach ( ( metricKey ) => {
266+ const row = [ metricKey . replace ( / _ / g, ' ' ) . replace ( / \b \w / g, ( l ) => l . toUpperCase ( ) ) ] ;
267+ Object . entries ( completeTestRuns ) . forEach ( ( [ testRunId , testRun ] ) => {
268+ const accuracyBreakdown = testRun . accuracyBreakdown || { } ;
269+ const value = accuracyBreakdown [ metricKey ] ;
270+ const displayValue = value !== null && value !== undefined ? value . toFixed ( 3 ) : '0.000' ;
271+ row . push ( displayValue ) ;
272+ } ) ;
273+ accuracyRows . push ( row ) ;
274+ } ) ;
275+
276+ // Add weighted overall score to accuracy breakdown
277+ const weightedRow = [ 'Weighted Overall Score' ] ;
278+ Object . entries ( completeTestRuns ) . forEach ( ( [ testRunId , testRun ] ) => {
279+ if ( testRun . weightedOverallScores && testRun . weightedOverallScores . length > 0 ) {
280+ const avg = testRun . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / testRun . weightedOverallScores . length ;
281+ weightedRow . push ( avg . toFixed ( 3 ) ) ;
282+ } else {
283+ weightedRow . push ( 'N/A' ) ;
284+ }
285+ } ) ;
286+ accuracyRows . push ( weightedRow ) ;
287+
189288 // Add config comparison rows
190289 const configRows = [ ] ;
191290 if ( comparisonData . configs && comparisonData . configs . length > 0 ) {
@@ -202,14 +301,17 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
202301 [ '=== PERFORMANCE METRICS ===' ] ,
203302 ...performanceRows ,
204303 [ '' ] ,
304+ [ '=== CONFIGURATION COMPARISON ===' ] ,
305+ ...configRows ,
306+ [ '' ] ,
307+ [ '=== AVERAGE ACCURACY BREAKDOWN ===' ] ,
308+ ...accuracyRows ,
309+ [ '' ] ,
205310 [ '=== COST BREAKDOWN ===' ] ,
206311 ...costRows ,
207312 [ '' ] ,
208313 [ '=== USAGE BREAKDOWN ===' ] ,
209314 ...usageRows ,
210- [ '' ] ,
211- [ '=== CONFIGURATION DIFFERENCES ===' ] ,
212- ...configRows ,
213315 ] ;
214316
215317 const csvContent = csvData . map ( ( row ) => row . map ( ( field ) => `"${ String ( field ) . replace ( / " / g, '""' ) } "` ) . join ( ',' ) ) . join ( '\n' ) ;
@@ -255,8 +357,11 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
255357 completedFiles : testRun . completedFiles ,
256358 failedFiles : testRun . failedFiles ,
257359 totalCost : testRun . totalCost ,
258- overallAccuracy : testRun . overallAccuracy ,
360+ averageAccuracy : testRun . overallAccuracy ,
259361 averageConfidence : testRun . averageConfidence ,
362+ averageWeightedOverallScore : testRun . weightedOverallScores && testRun . weightedOverallScores . length > 0
363+ ? testRun . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / testRun . weightedOverallScores . length
364+ : null ,
260365 duration :
261366 testRun . createdAt && testRun . completedAt
262367 ? ( ( ) => {
@@ -269,13 +374,20 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
269374 } ,
270375 ] ) ,
271376 ) ,
377+ configurationDifferences : comparisonData . configs || [ ] ,
378+ accuracyBreakdown : Object . fromEntries (
379+ Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => {
380+ const breakdown = { ...( testRun . accuracyBreakdown || { } ) } ;
381+ // Add weighted overall score to accuracy breakdown
382+ if ( testRun . weightedOverallScores && testRun . weightedOverallScores . length > 0 ) {
383+ breakdown . weightedOverallScore = testRun . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / testRun . weightedOverallScores . length ;
384+ }
385+ return [ testRunId , breakdown ] ;
386+ } ) ,
387+ ) ,
272388 costBreakdown : Object . fromEntries (
273389 Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => [ testRunId , testRun . costBreakdown || { } ] ) ,
274390 ) ,
275- accuracyBreakdown : Object . fromEntries (
276- Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => [ testRunId , testRun . accuracyBreakdown || { } ] ) ,
277- ) ,
278- configurationDifferences : comparisonData . configs || [ ] ,
279391 } ;
280392
281393 const jsonData = JSON . stringify ( filteredData , null , 2 ) ;
@@ -299,7 +411,7 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
299411 } ;
300412
301413 if ( comparing ) {
302- return < Box > Loading comparison...</ Box > ;
414+ return < ProgressBar status = "in-progress" label = " Loading comparison..." value = { currentAttempt * 20 } / >;
303415 }
304416
305417 if ( ! comparisonData ) {
@@ -415,31 +527,31 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
415527 ) ,
416528 } ,
417529 {
418- metric : 'Overall Accuracy' ,
530+ metric : 'Average Accuracy' ,
419531 ...Object . fromEntries (
420532 Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => [
421533 testRunId ,
422534 testRun . overallAccuracy !== null && testRun . overallAccuracy !== undefined
423- ? ` ${ ( testRun . overallAccuracy * 100 ) . toFixed ( 1 ) } %`
535+ ? testRun . overallAccuracy . toFixed ( 3 )
424536 : 'N/A' ,
425537 ] ) ,
426538 ) ,
427539 } ,
428540 {
429- metric : 'Weighted Overall Score' ,
541+ metric : 'Average Weighted Overall Score' ,
430542 ...Object . fromEntries (
431543 Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => {
432544 if ( testRun . weightedOverallScores && testRun . weightedOverallScores . length > 0 ) {
433545 const avg =
434546 testRun . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / testRun . weightedOverallScores . length ;
435- return [ testRunId , ` ${ ( avg * 100 ) . toFixed ( 1 ) } %` ] ;
547+ return [ testRunId , avg . toFixed ( 3 ) ] ;
436548 }
437549 return [ testRunId , 'N/A' ] ;
438550 } ) ,
439551 ) ,
440552 } ,
441553 {
442- metric : 'Overall Confidence' ,
554+ metric : 'Average Confidence' ,
443555 ...Object . fromEntries (
444556 Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => [
445557 testRunId ,
@@ -513,8 +625,8 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
513625 } ) ( ) }
514626 </ Container >
515627
516- { /* Accuracy Comparison */ }
517- < Container header = { < Header variant = "h3" > Accuracy Comparison</ Header > } >
628+ { /* Average Accuracy Comparison */ }
629+ < Container header = { < Header variant = "h3" > Average Accuracy Comparison</ Header > } >
518630 { ( ( ) => {
519631 const hasAccuracyData = Object . values ( completeTestRuns ) . some ( ( testRun ) => testRun . accuracyBreakdown ) ;
520632
@@ -533,17 +645,31 @@ const TestComparison = ({ preSelectedTestRunIds = [] }) => {
533645
534646 return (
535647 < Table
536- items = { Array . from ( allAccuracyMetrics ) . map ( ( metricKey ) => ( {
537- metric : metricKey . replace ( / _ / g, ' ' ) . replace ( / \b \w / g, ( l ) => l . toUpperCase ( ) ) ,
538- ...Object . fromEntries (
539- Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => {
540- const accuracyBreakdown = testRun . accuracyBreakdown || { } ;
541- const value = accuracyBreakdown [ metricKey ] ;
542- const displayValue = value !== null && value !== undefined ? `${ ( value * 100 ) . toFixed ( 1 ) } %` : '0.0%' ;
543- return [ testRunId , displayValue ] ;
544- } ) ,
545- ) ,
546- } ) ) }
648+ items = { [
649+ ...Array . from ( allAccuracyMetrics ) . map ( ( metricKey ) => ( {
650+ metric : metricKey . replace ( / _ / g, ' ' ) . replace ( / \b \w / g, ( l ) => l . toUpperCase ( ) ) ,
651+ ...Object . fromEntries (
652+ Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => {
653+ const accuracyBreakdown = testRun . accuracyBreakdown || { } ;
654+ const value = accuracyBreakdown [ metricKey ] ;
655+ const displayValue = value !== null && value !== undefined ? value . toFixed ( 3 ) : '0.000' ;
656+ return [ testRunId , displayValue ] ;
657+ } ) ,
658+ ) ,
659+ } ) ) ,
660+ {
661+ metric : 'Weighted Overall Score' ,
662+ ...Object . fromEntries (
663+ Object . entries ( completeTestRuns ) . map ( ( [ testRunId , testRun ] ) => {
664+ if ( testRun . weightedOverallScores && testRun . weightedOverallScores . length > 0 ) {
665+ const avg = testRun . weightedOverallScores . reduce ( ( sum , score ) => sum + score , 0 ) / testRun . weightedOverallScores . length ;
666+ return [ testRunId , avg . toFixed ( 3 ) ] ;
667+ }
668+ return [ testRunId , 'N/A' ] ;
669+ } ) ,
670+ ) ,
671+ }
672+ ] }
547673 columnDefinitions = { [
548674 { id : 'metric' , header : 'Accuracy Metric' , cell : ( item ) => item . metric } ,
549675 ...Object . keys ( completeTestRuns ) . map ( ( testRunId ) => ( {
0 commit comments