Skip to content

Commit 6e1b876

Browse files
author
Taniya Mathur
committed
test studio improvements
1 parent 18c0947 commit 6e1b876

File tree

20 files changed

+6074
-2511
lines changed

20 files changed

+6074
-2511
lines changed

docs/test-studio.md

Lines changed: 87 additions & 338 deletions
Large diffs are not rendered by default.

lib/idp_common_pkg/tests/unit/test_results_resolver.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def test_get_document_costs_from_parquet_success(
9393
"context",
9494
"service_api",
9595
"unit",
96+
"value",
97+
"unit_cost",
9698
"estimated_cost",
9799
]
98100

@@ -105,6 +107,8 @@ def test_get_document_costs_from_parquet_success(
105107
"context": ["test", "test"],
106108
"service_api": ["bedrock", "textract"],
107109
"unit": ["tokens", "pages"],
110+
"value": [1000, 5],
111+
"unit_cost": [0.0015, 0.45],
108112
"estimated_cost": [1.50, 2.25],
109113
}
110114

@@ -117,13 +121,29 @@ def test_get_document_costs_from_parquet_success(
117121

118122
result = index._get_document_costs_from_reporting_db("test-doc", "2025-10-08")
119123

120-
assert result == {"test_bedrock_tokens": 1.50, "test_textract_pages": 2.25}
124+
expected = {
125+
"test": {
126+
"bedrock_tokens": {
127+
"unit": "tokens",
128+
"value": 1000,
129+
"unit_cost": 0.0015,
130+
"estimated_cost": 1.50,
131+
},
132+
"textract_pages": {
133+
"unit": "pages",
134+
"value": 5,
135+
"unit_cost": 0.45,
136+
"estimated_cost": 2.25,
137+
},
138+
}
139+
}
140+
assert result == expected
121141
mock_s3_client.list_objects_v2.assert_called_once()
122142

123143

124144
@pytest.mark.unit
125145
@patch.dict(os.environ, {"REPORTING_BUCKET": "test-bucket"})
126-
@patch("index.boto3.client")
146+
@patch("boto3.client")
127147
def test_get_document_costs_no_files_found(mock_boto3):
128148
"""Test when no Parquet files are found"""
129149

@@ -146,25 +166,6 @@ def test_get_document_costs_no_bucket():
146166
assert result == {}
147167

148168

149-
@pytest.mark.unit
150-
def test_compare_document_costs_parallel_execution():
151-
"""Test parallel execution of cost comparison"""
152-
153-
with patch.object(index, "_get_document_costs_from_reporting_db") as mock_get_costs:
154-
mock_get_costs.side_effect = [
155-
{"test_bedrock_tokens": 1.50}, # test document
156-
{"test_bedrock_tokens": 1.25}, # baseline document
157-
]
158-
159-
result = index._compare_document_costs(
160-
"test-doc", "baseline-doc", "2025-10-08", "2025-10-07"
161-
)
162-
163-
# Verify both documents were queried
164-
assert mock_get_costs.call_count == 2
165-
assert result is not None
166-
167-
168169
@pytest.mark.unit
169170
def test_accuracy_breakdown_structure():
170171
"""Test accuracy breakdown data structure"""
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
4+
from datetime import datetime
5+
from unittest.mock import Mock
6+
7+
import pytest
8+
9+
10+
@pytest.mark.unit
11+
def test_is_valid_test_set_structure():
12+
"""Test validation of test set folder structure"""
13+
14+
def _is_valid_test_set_structure(s3_client, bucket, prefix):
15+
"""Check if prefix contains input/ and baseline/ folders"""
16+
try:
17+
# Check for input/ folder
18+
input_response = s3_client.list_objects_v2(
19+
Bucket=bucket, Prefix=f"{prefix}/input/", MaxKeys=1
20+
)
21+
22+
# Check for baseline/ folder
23+
baseline_response = s3_client.list_objects_v2(
24+
Bucket=bucket, Prefix=f"{prefix}/baseline/", MaxKeys=1
25+
)
26+
27+
has_input = input_response.get("KeyCount", 0) > 0
28+
has_baseline = baseline_response.get("KeyCount", 0) > 0
29+
30+
return has_input and has_baseline
31+
32+
except Exception:
33+
return False
34+
35+
# Mock S3 client
36+
s3_client = Mock()
37+
38+
# Test case: Valid structure (has both input/ and baseline/ folders)
39+
s3_client.list_objects_v2.side_effect = [
40+
{
41+
"KeyCount": 1,
42+
"Contents": [{"Key": "my-test-set/input/file1.pdf"}],
43+
}, # input/ folder
44+
{
45+
"KeyCount": 1,
46+
"Contents": [{"Key": "my-test-set/baseline/file1.pdf/result.json"}],
47+
}, # baseline/ folder
48+
]
49+
50+
result = _is_valid_test_set_structure(s3_client, "test-bucket", "my-test-set")
51+
assert result is True
52+
53+
54+
@pytest.mark.unit
55+
def test_validate_test_set_files_valid():
56+
"""Test file validation with matching input and baseline files"""
57+
58+
def _validate_test_set_files(s3_client, bucket, prefix):
59+
"""Validate that input and baseline files match"""
60+
try:
61+
input_files = set()
62+
baseline_files = set()
63+
64+
# Get input files
65+
paginator = s3_client.get_paginator("list_objects_v2")
66+
for page in paginator.paginate(Bucket=bucket, Prefix=f"{prefix}/input/"):
67+
for obj in page.get("Contents", []):
68+
key = obj["Key"]
69+
if not key.endswith("/"): # Skip directories
70+
filename = key.split("/")[-1]
71+
input_files.add(filename)
72+
73+
# Get baseline folder names
74+
for page in paginator.paginate(Bucket=bucket, Prefix=f"{prefix}/baseline/"):
75+
for obj in page.get("Contents", []):
76+
key = obj["Key"]
77+
if not key.endswith("/"): # Skip directories
78+
# Extract folder name after /baseline/
79+
parts = key.split(f"{prefix}/baseline/", 1)
80+
if len(parts) == 2 and "/" in parts[1]:
81+
path_parts = parts[1].split("/")
82+
# Look for .pdf file in path
83+
for part in path_parts:
84+
if part.endswith(".pdf"):
85+
baseline_files.add(part)
86+
break
87+
88+
# Validate matching
89+
if len(input_files) == 0:
90+
return {
91+
"valid": False,
92+
"error": "No input files found",
93+
"input_count": 0,
94+
}
95+
96+
if len(baseline_files) == 0:
97+
return {
98+
"valid": False,
99+
"error": "No baseline files found",
100+
"input_count": len(input_files),
101+
}
102+
103+
missing_baselines = input_files - baseline_files
104+
if missing_baselines:
105+
return {
106+
"valid": False,
107+
"error": f"Missing baseline files for: {', '.join(list(missing_baselines)[:3])}",
108+
"input_count": len(input_files),
109+
}
110+
111+
extra_baselines = baseline_files - input_files
112+
if extra_baselines:
113+
return {
114+
"valid": False,
115+
"error": f"Extra baseline files: {', '.join(list(extra_baselines)[:3])}",
116+
"input_count": len(input_files),
117+
}
118+
119+
return {"valid": True, "input_count": len(input_files)}
120+
121+
except Exception as e:
122+
return {
123+
"valid": False,
124+
"error": f"Validation error: {str(e)}",
125+
"input_count": 0,
126+
}
127+
128+
s3_client = Mock()
129+
130+
# Mock paginator - same instance used for both calls
131+
paginator = Mock()
132+
paginator.paginate.side_effect = [
133+
# First call for input files
134+
[
135+
{
136+
"Contents": [
137+
{"Key": "my-test-set/input/document1.pdf"},
138+
{"Key": "my-test-set/input/document2.pdf"},
139+
]
140+
}
141+
],
142+
# Second call for baseline files
143+
[
144+
{
145+
"Contents": [
146+
{"Key": "my-test-set/baseline/document1.pdf/sections/result.json"},
147+
{"Key": "my-test-set/baseline/document2.pdf/extraction.json"},
148+
]
149+
}
150+
],
151+
]
152+
153+
s3_client.get_paginator.return_value = paginator
154+
155+
result = _validate_test_set_files(s3_client, "test-bucket", "my-test-set")
156+
157+
assert result["valid"] is True
158+
assert result["input_count"] == 2
159+
assert "error" not in result
160+
161+
162+
@pytest.mark.unit
163+
def test_validate_test_set_files_missing_baseline():
164+
"""Test file validation with missing baseline files"""
165+
166+
def _validate_test_set_files(s3_client, bucket, prefix):
167+
"""Validate that input and baseline files match"""
168+
input_files = set()
169+
baseline_files = set()
170+
171+
# Get input files
172+
paginator = s3_client.get_paginator("list_objects_v2")
173+
for page in paginator.paginate(Bucket=bucket, Prefix=f"{prefix}/input/"):
174+
for obj in page.get("Contents", []):
175+
key = obj["Key"]
176+
if not key.endswith("/"):
177+
filename = key.split("/")[-1]
178+
input_files.add(filename)
179+
180+
# Get baseline folder names
181+
for page in paginator.paginate(Bucket=bucket, Prefix=f"{prefix}/baseline/"):
182+
for obj in page.get("Contents", []):
183+
key = obj["Key"]
184+
if not key.endswith("/"):
185+
parts = key.split(f"{prefix}/baseline/", 1)
186+
if len(parts) == 2 and "/" in parts[1]:
187+
path_parts = parts[1].split("/")
188+
for part in path_parts:
189+
if part.endswith(".pdf"):
190+
baseline_files.add(part)
191+
break
192+
193+
missing_baselines = input_files - baseline_files
194+
if missing_baselines:
195+
return {
196+
"valid": False,
197+
"error": f"Missing baseline files for: {', '.join(list(missing_baselines))}",
198+
"input_count": len(input_files),
199+
}
200+
201+
return {"valid": True, "input_count": len(input_files)}
202+
203+
s3_client = Mock()
204+
205+
# Mock paginator for input files
206+
input_paginator = Mock()
207+
input_paginator.paginate.return_value = [
208+
{
209+
"Contents": [
210+
{"Key": "my-test-set/input/document1.pdf"},
211+
{"Key": "my-test-set/input/document2.pdf"},
212+
]
213+
}
214+
]
215+
216+
# Mock paginator for baseline files (missing document2.pdf)
217+
baseline_paginator = Mock()
218+
baseline_paginator.paginate.return_value = [
219+
{
220+
"Contents": [
221+
{"Key": "my-test-set/baseline/document1.pdf/sections/result.json"}
222+
]
223+
}
224+
]
225+
226+
s3_client.get_paginator.side_effect = [input_paginator, baseline_paginator]
227+
228+
result = _validate_test_set_files(s3_client, "test-bucket", "my-test-set")
229+
230+
assert result["valid"] is False
231+
assert result["input_count"] == 2
232+
assert "Missing baseline files for:" in result["error"]
233+
assert "document2.pdf" in result["error"]
234+
235+
236+
@pytest.mark.unit
237+
def test_get_test_set_creation_time():
238+
"""Test getting creation time from S3 objects"""
239+
240+
def _get_test_set_creation_time(s3_client, bucket, prefix):
241+
"""Get the earliest creation time from files in the test set"""
242+
earliest_time = None
243+
244+
# Check input folder for earliest file
245+
paginator = s3_client.get_paginator("list_objects_v2")
246+
for page in paginator.paginate(
247+
Bucket=bucket, Prefix=f"{prefix}/input/", MaxKeys=10
248+
):
249+
for obj in page.get("Contents", []):
250+
if not obj["Key"].endswith("/"): # Skip directories
251+
if earliest_time is None or obj["LastModified"] < earliest_time:
252+
earliest_time = obj["LastModified"]
253+
254+
if earliest_time is None:
255+
raise Exception(
256+
f"No files found in {prefix}/input/ to determine creation time"
257+
)
258+
259+
return earliest_time.isoformat()
260+
261+
s3_client = Mock()
262+
263+
# Mock paginator with files having different timestamps
264+
paginator = Mock()
265+
older_time = datetime(2023, 1, 1, 10, 0, 0)
266+
newer_time = datetime(2023, 1, 1, 12, 0, 0)
267+
268+
paginator.paginate.return_value = [
269+
{
270+
"Contents": [
271+
{"Key": "my-test-set/input/file1.pdf", "LastModified": newer_time},
272+
{"Key": "my-test-set/input/file2.pdf", "LastModified": older_time},
273+
]
274+
}
275+
]
276+
277+
s3_client.get_paginator.return_value = paginator
278+
279+
result = _get_test_set_creation_time(s3_client, "test-bucket", "my-test-set")
280+
281+
# Should return the earlier timestamp
282+
assert result == older_time.isoformat()
283+
284+
285+
@pytest.mark.unit
286+
def test_get_test_set_creation_time_no_files():
287+
"""Test creation time function throws exception when no files found"""
288+
289+
def _get_test_set_creation_time(s3_client, bucket, prefix):
290+
"""Get the earliest creation time from files in the test set"""
291+
earliest_time = None
292+
293+
paginator = s3_client.get_paginator("list_objects_v2")
294+
for page in paginator.paginate(
295+
Bucket=bucket, Prefix=f"{prefix}/input/", MaxKeys=10
296+
):
297+
for obj in page.get("Contents", []):
298+
if not obj["Key"].endswith("/"):
299+
if earliest_time is None or obj["LastModified"] < earliest_time:
300+
earliest_time = obj["LastModified"]
301+
302+
if earliest_time is None:
303+
raise Exception(
304+
f"No files found in {prefix}/input/ to determine creation time"
305+
)
306+
307+
return earliest_time.isoformat()
308+
309+
s3_client = Mock()
310+
311+
# Mock paginator with no files
312+
paginator = Mock()
313+
paginator.paginate.return_value = [{"Contents": []}]
314+
s3_client.get_paginator.return_value = paginator
315+
316+
with pytest.raises(Exception) as exc_info:
317+
_get_test_set_creation_time(s3_client, "test-bucket", "my-test-set")
318+
319+
assert "No files found in my-test-set/input/ to determine creation time" in str(
320+
exc_info.value
321+
)

0 commit comments

Comments
 (0)