aws-solutions-library-samples
diff --git a/‎docs/test-studio.md‎
Lines changed: 20 additions & 4 deletions b/‎docs/test-studio.md‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎lib/idp_common_pkg/tests/unit/test_test_set_resolver.py‎
Lines changed: 8 additions & 3 deletions b/‎lib/idp_common_pkg/tests/unit/test_test_set_resolver.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎src/api/schema.graphql‎
Lines changed: 2 additions & 2 deletions b/‎src/api/schema.graphql‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/lambda/test_results_resolver/index.py‎
Lines changed: 69 additions & 10 deletions b/‎src/lambda/test_results_resolver/index.py‎
Lines changed: 69 additions & 10 deletions
diff --git a/‎src/lambda/test_set_file_copier/index.py‎
Lines changed: 72 additions & 7 deletions b/‎src/lambda/test_set_file_copier/index.py‎
Lines changed: 72 additions & 7 deletions
diff --git a/‎src/lambda/test_set_resolver/index.py‎
Lines changed: 16 additions & 7 deletions b/‎src/lambda/test_set_resolver/index.py‎
Lines changed: 16 additions & 7 deletions
@@ -34,8 +34,21 @@ The Test Studio consists of two main tabs:
 
 #### TestResultsResolver Lambda
 - **Location**: `src/lambda/test_results_resolver/index.py`
-- **Purpose**: Handles GraphQL queries for test results and comparisons
-- **Features**: Result retrieval, comparison logic, metrics aggregation
+- **Purpose**: Handles GraphQL queries for test results and comparisons, plus asynchronous cache updates
+- **Features**: 
+  - Result retrieval with cached metrics
+  - Comparison logic and metrics aggregation
+  - Dual event handling (GraphQL + SQS)
+  - Asynchronous cache update processing
+  - Progress-aware status updates
+
+#### TestResultCacheUpdateQueue
+- **Type**: AWS SQS Queue
+- **Purpose**: Decouples heavy metric calculations from synchronous API calls
+- **Features**: 
+  - Encrypted message storage
+  - 15-minute visibility timeout for long-running calculations
+  - Automatic retry handling
 
 ### GraphQL Schema
 - **Location**: `src/api/schema.graphql`
@@ -77,7 +90,9 @@ components/
 ## Test Sets
 
 ### Creating Test Sets
-1. **Pattern-based**: Define file patterns (e.g., `*.pdf`)
+1. **Pattern-based**: Define file patterns (e.g., `*.pdf`) with bucket type selection
+   - **Input Bucket**: Scan main processing bucket for matching files
+   - **Test Set Bucket**: Scan dedicated test set bucket for matching files
 2. **Zip Upload**: Upload zip containing `input/` and `baseline/` folders
 3. **Direct Upload**: Files uploaded directly to TestSetBucket are auto-detected
 
@@ -126,7 +141,8 @@ my-test-set/
 ## Key Features
 
 ### Test Set Management
-- Reusable collections with file patterns
+- Reusable collections with file patterns across multiple buckets
+- Dual bucket support (Input Bucket and Test Set Bucket)
 - Zip upload with automatic extraction
 - Direct upload detection via dual polling
 - File structure validation with error reporting
 
@@ -73,7 +73,12 @@ def test_add_test_set_structure(self, mock_boto3, mock_datetime, mock_uuid):
         mock_boto3.return_value = mock_sqs
 
         with patch.object(test_set_index.db_client, "put_item") as mock_put:
-            args = {"name": "test", "filePattern": "*.pdf", "fileCount": 5}
+            args = {
+                "name": "test",
+                "filePattern": "*.pdf",
+                "fileCount": 5,
+                "bucketType": "input",
+            }
             result = test_set_index.add_test_set(args)
 
             mock_put.assert_called_once()
@@ -126,8 +131,8 @@ def test_list_input_bucket_files(self):
         with patch.object(test_set_index, "find_matching_files") as mock_find:
             mock_find.return_value = ["file1.pdf", "file2.pdf"]
 
-            args = {"filePattern": "*.pdf"}
-            result = test_set_index.list_input_bucket_files(args)
+            args = {"filePattern": "*.pdf", "bucketType": "input"}
+            result = test_set_index.list_bucket_files(args)
 
             mock_find.assert_called_once_with("test-bucket", "*.pdf")
             assert result == ["file1.pdf", "file2.pdf"]
@@ -431,7 +431,7 @@ type Mutation {
     @aws_iam
   startTestRun(input: TestRunInput!): TestRun @aws_cognito_user_pools
   deleteTests(testRunIds: [String!]!): Boolean! @aws_cognito_user_pools
-  addTestSet(name: String!, filePattern: String!, fileCount: Int!): TestSet @aws_cognito_user_pools
+  addTestSet(name: String!, filePattern: String!, bucketType: String!, fileCount: Int!): TestSet @aws_cognito_user_pools
   addTestSetFromUpload(input: TestSetUploadInput!): TestSetUploadResponse @aws_cognito_user_pools
   deleteTestSets(testSetIds: [String!]!): Boolean! @aws_cognito_user_pools
 }
@@ -475,7 +475,7 @@ type Query @aws_cognito_user_pools @aws_iam {
   getTestRunStatus(testRunId: String!): TestRunStatus @aws_cognito_user_pools
   compareTestRuns(testRunIds: [String!]!): TestRunComparison @aws_cognito_user_pools
   getTestSets: [TestSet] @aws_cognito_user_pools
-  listInputBucketFiles(filePattern: String!): [String] @aws_cognito_user_pools
+  listBucketFiles(bucketType: String!, filePattern: String!): [String] @aws_cognito_user_pools
   validateTestFileName(fileName: String!): TestSetValidationResponse @aws_cognito_user_pools
 }
 
 
@@ -10,6 +10,8 @@
 
 import boto3
 
+sqs = boto3.client('sqs')
+
 
 # Custom JSON encoder to handle Decimal objects from DynamoDB
 class DecimalEncoder(json.JSONEncoder):
@@ -24,7 +26,13 @@ def default(self, obj):
 dynamodb = boto3.resource('dynamodb')
 
 def handler(event, context):
-    """GraphQL resolver for test results queries"""
+    """Handle both GraphQL resolver and SQS events"""
+    
+    # Check if this is an SQS event
+    if 'Records' in event:
+        return handle_cache_update_request(event, context)
+    
+    # Otherwise handle as GraphQL resolver
     field_name = event['info']['fieldName']
 
     if field_name == 'getTestRuns':
@@ -46,6 +54,52 @@ def handler(event, context):
 
     raise ValueError(f"Unknown field: {field_name}")
 
+def handle_cache_update_request(event, context):
+    """Process SQS messages to calculate and cache test result metrics"""
+    
+    for record in event['Records']:
+        try:
+            message = json.loads(record['body'])
+            test_run_id = message['testRunId']
+            
+            logger.info(f"Processing cache update for test run: {test_run_id}")
+            
+            # Calculate metrics
+            aggregated_metrics = _aggregate_test_run_metrics(test_run_id)
+            
+            # Cache the metrics
+            metrics_to_cache = {
+                'overallAccuracy': aggregated_metrics.get('overall_accuracy'),
+                'weightedOverallScores': aggregated_metrics.get('weighted_overall_scores', []),
+                'averageConfidence': aggregated_metrics.get('average_confidence'),
+                'accuracyBreakdown': aggregated_metrics.get('accuracy_breakdown', {}),
+                'totalCost': aggregated_metrics.get('total_cost', 0),
+                'costBreakdown': aggregated_metrics.get('cost_breakdown', {})
+            }
+            
+            table = dynamodb.Table(os.environ['TRACKING_TABLE'])
+            table.update_item(
+                Key={'PK': f'testrun#{test_run_id}', 'SK': 'metadata'},
+                UpdateExpression='SET testRunResult = :metrics',
+                ExpressionAttributeValues={':metrics': float_to_decimal(metrics_to_cache)}
+            )
+            
+            logger.info(f"Successfully cached metrics for test run: {test_run_id}")
+            
+        except Exception as e:
+            logger.error(f"Failed to process cache update for {record.get('body', 'unknown')}: {e}")
+            # Don't raise - let other messages in batch continue processing
+
+def float_to_decimal(obj):
+    """Convert float values to Decimal for DynamoDB storage"""
+    if isinstance(obj, float):
+        return Decimal(str(obj))
+    elif isinstance(obj, dict):
+        return {k: float_to_decimal(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [float_to_decimal(v) for v in obj]
+    return obj
+
 def compare_test_runs(test_run_ids):
     """Compare multiple test runs"""
     logger.info(f"Comparing test runs: {test_run_ids}")
@@ -181,15 +235,6 @@ def get_test_results(test_run_id):
     try:
         logger.info(f"Caching metrics for test run: {test_run_id}")
 
-        def float_to_decimal(obj):
-            if isinstance(obj, float):
-                return Decimal(str(obj))
-            elif isinstance(obj, dict):
-                return {k: float_to_decimal(v) for k, v in obj.items()}
-            elif isinstance(obj, list):
-                return [float_to_decimal(v) for v in obj]
-            return obj
-        
         # Cache only static metrics
         metrics_to_cache = {
             'overallAccuracy': aggregated_metrics.get('overall_accuracy'),
@@ -424,6 +469,20 @@ def get_test_run_status(test_run_id):
                     }
                 )
                 logger.info(f"Successfully updated test run {test_run_id} status to {overall_status}")
+                
+                # Queue metric calculation for completed test runs
+                if overall_status in ['COMPLETE', 'PARTIAL_COMPLETE'] and not item.get('testRunResult'):
+                    try:
+                        queue_url = os.environ.get('TEST_RESULT_CACHE_UPDATE_QUEUE_URL')
+                        if queue_url:
+                            sqs.send_message(
+                                QueueUrl=queue_url,
+                                MessageBody=json.dumps({'testRunId': test_run_id})
+                            )
+                            logger.info(f"Queued cache update for test run: {test_run_id}")
+                    except Exception as e:
+                        logger.warning(f"Failed to queue cache update for {test_run_id}: {e}")
+                        
             except Exception as e:
                 logger.error(f"Failed to auto-update test run {test_run_id} status: {e}")
 
 
@@ -24,17 +24,26 @@ def handler(event, context):
 
             test_set_id = message['testSetId']
             file_pattern = message['filePattern']
+            bucket_type = message['bucketType']
             tracking_table = message['trackingTable']
 
             # Get environment variables
             input_bucket = os.environ['INPUT_BUCKET']
             test_set_bucket = os.environ['TEST_SET_BUCKET']
             baseline_bucket = os.environ['BASELINE_BUCKET']
 
-            logger.info(f"Processing test set {test_set_id} with pattern '{file_pattern}'")
+            # Determine source bucket based on bucket type
+            if bucket_type == 'input':
+                source_bucket = input_bucket
+            elif bucket_type == 'testset':
+                source_bucket = test_set_bucket
+            else:
+                raise ValueError(f"Invalid bucket type: {bucket_type}")
 
-            # Find matching files in input bucket
-            matching_files = find_matching_files(input_bucket, file_pattern)
+            logger.info(f"Processing test set {test_set_id} with pattern '{file_pattern}' from {bucket_type} bucket")
+            
+            # Find matching files in source bucket
+            matching_files = find_matching_files(source_bucket, file_pattern)
 
             if not matching_files:
                 raise ValueError(f"No files found matching pattern: {file_pattern}")
@@ -45,9 +54,25 @@ def handler(event, context):
             missing_baselines = []
             for file_key in matching_files:
                 try:
+                    if bucket_type == 'testset':
+                        # For testset bucket, baseline is in the same bucket under baseline/ path
+                        # Extract test set name from file path (assuming format: test_set_name/input/file)
+                        path_parts = file_key.split('/')
+                        if len(path_parts) >= 3 and path_parts[1] == 'input':
+                            test_set_name = path_parts[0]
+                            file_name = path_parts[2]
+                            baseline_prefix = f"{test_set_name}/baseline/{file_name}/"
+                            baseline_check_bucket = source_bucket
+                        else:
+                            missing_baselines.append(file_key)
+                            continue
+                    else:
+                        # For input bucket, baseline is in separate baseline bucket
+                        baseline_prefix = f"{file_key}/"
+                        baseline_check_bucket = baseline_bucket
+                    
                     # Check if baseline folder exists by listing objects with prefix
-                    baseline_prefix = f"{file_key}/"
-                    response = s3.list_objects_v2(Bucket=baseline_bucket, Prefix=baseline_prefix, MaxKeys=1)
+                    response = s3.list_objects_v2(Bucket=baseline_check_bucket, Prefix=baseline_prefix, MaxKeys=1)
 
                     if 'Contents' not in response or len(response['Contents']) == 0:
                         missing_baselines.append(file_key)
@@ -60,10 +85,13 @@ def handler(event, context):
                 raise ValueError(f"Missing baseline folders for: {', '.join(missing_baselines)}")
 
             # Copy input files to test set bucket
-            _copy_files_to_test_set(input_bucket, test_set_bucket, test_set_id, 'input', matching_files)
+            _copy_files_to_test_set(source_bucket, test_set_bucket, test_set_id, 'input', matching_files)
 
             # Copy baseline folders to test set bucket
-            _copy_files_to_test_set(baseline_bucket, test_set_bucket, test_set_id, 'baseline', matching_files)
+            if bucket_type == 'testset':
+                _copy_baseline_from_testset(source_bucket, test_set_bucket, test_set_id, matching_files)
+            else:
+                _copy_files_to_test_set(baseline_bucket, test_set_bucket, test_set_id, 'baseline', matching_files)
 
             logger.info(f"Copied {len(matching_files)} input files and {len(matching_files)} baseline folders")
 
@@ -121,6 +149,43 @@ def _copy_files_to_test_set(source_bucket, dest_bucket, test_set_id, folder_type
 
             logger.info(f"Copied {folder_type} file: {source_key} -> {dest_bucket}/{dest_key}")
 
+def _copy_baseline_from_testset(source_bucket, dest_bucket, test_set_id, files):
+    """Copy baseline files from testset bucket where baselines are in test_set/baseline/ path"""
+    
+    for file_key in files:
+        # Extract test set name and file name from path (format: test_set_name/input/file_name)
+        path_parts = file_key.split('/')
+        if len(path_parts) >= 3 and path_parts[1] == 'input':
+            source_test_set_name = path_parts[0]
+            file_name = path_parts[2]
+            
+            # Source baseline path in testset bucket
+            source_baseline_prefix = f"{source_test_set_name}/baseline/{file_name}/"
+            # Destination baseline path
+            dest_baseline_prefix = f"{test_set_id}/baseline/{file_name}/"
+            
+            # List all objects in the source baseline folder
+            paginator = s3.get_paginator('list_objects_v2')
+            pages = paginator.paginate(Bucket=source_bucket, Prefix=source_baseline_prefix)
+            
+            for page in pages:
+                if 'Contents' in page:
+                    for obj in page['Contents']:
+                        source_key = obj['Key']
+                        # Replace the source baseline prefix with dest baseline prefix
+                        dest_key = source_key.replace(source_baseline_prefix, dest_baseline_prefix, 1)
+                        
+                        # Copy file
+                        s3.copy_object(
+                            CopySource={'Bucket': source_bucket, 'Key': source_key},
+                            Bucket=dest_bucket,
+                            Key=dest_key
+                        )
+                        
+                        logger.info(f"Copied testset baseline file: {source_key} -> {dest_bucket}/{dest_key}")
+        else:
+            logger.warning(f"Unexpected file path format for testset baseline: {file_key}")
+
 def _update_test_set_status(tracking_table, test_set_id, status, error=None):
     """Update test set status in tracking table"""
     table = dynamodb.Table(tracking_table)  # type: ignore
 
@@ -34,8 +34,8 @@ def handler(event, context):
         return delete_test_sets(event['arguments'])
     elif field_name == 'getTestSets':
         return get_test_sets()
-    elif field_name == 'listInputBucketFiles':
-        return list_input_bucket_files(event['arguments'])
+    elif field_name == 'listBucketFiles':
+        return list_bucket_files(event['arguments'])
     elif field_name == 'validateTestFileName':
         return validate_test_file_name(event['arguments'])
     else:
@@ -137,6 +137,7 @@ def add_test_set(args):
         MessageBody=json.dumps({
             'testSetId': test_set_id,
             'filePattern': args['filePattern'],
+            'bucketType': args['bucketType'],
             'trackingTable': os.environ['TRACKING_TABLE']
         })
     )
@@ -451,14 +452,22 @@ def _create_test_set_tracking_entry(test_set_id, name, file_count, status, error
         logger.error(f"Error creating tracking entry for {test_set_id}: {str(e)}")
 
 
-def list_input_bucket_files(args):
-    logger.info(f"Listing files with pattern: {args['filePattern']}")
+def list_bucket_files(args):
+    logger.info(f"Listing files with pattern: {args['filePattern']} from bucket type: {args['bucketType']}")
 
     file_pattern = args['filePattern']
-    input_bucket = os.environ['INPUT_BUCKET']
+    bucket_type = args['bucketType']
 
-    files = find_matching_files(input_bucket, file_pattern)
-    logger.info(f"Found {len(files)} matching files")
+    # Determine which bucket to use based on bucket type
+    if bucket_type == 'input':
+        bucket = os.environ['INPUT_BUCKET']
+    elif bucket_type == 'testset':
+        bucket = os.environ['TEST_SET_BUCKET']
+    else:
+        raise Exception(f"Invalid bucket type: {bucket_type}")
+    
+    files = find_matching_files(bucket, file_pattern)
+    logger.info(f"Found {len(files)} matching files in {bucket_type} bucket")
 
     return files