Skip to content

Commit 1085992

Browse files
committed
Enhance memory boundary tests in HTML formatter for large datasets
1 parent af11824 commit 1085992

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

python/tests/test_dataframe.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,13 +1468,16 @@ def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_sta
14681468
"""
14691469

14701470
# Get the raw size of the data to test boundary conditions
1471-
# First, capture output with no limits - use very high max_rows to avoid row limit
1471+
# First, capture output with no limits
1472+
# NOTE: max_rows=200000 is set well above the dataset size (100k rows) to ensure
1473+
# we're testing memory limits, not row limits. Default max_rows=10 would
1474+
# truncate before memory limit is reached.
14721475
configure_formatter(max_memory_bytes=10 * MB, min_rows=1, max_rows=200000)
14731476
unrestricted_output = large_df._repr_html_()
14741477
unrestricted_rows = count_table_rows(unrestricted_output)
14751478

14761479
# Test 1: Very small memory limit should still respect min_rows
1477-
# With large dataset, this should definitely hit memory limit
1480+
# With large dataset, this should definitely hit memory limit before min_rows
14781481
configure_formatter(max_memory_bytes=10, min_rows=1)
14791482
html_output = large_df._repr_html_()
14801483
tr_count = count_table_rows(html_output)
@@ -1483,6 +1486,8 @@ def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_sta
14831486
assert "data truncated" in html_output.lower()
14841487

14851488
# Test 2: Memory limit at default size (2MB) should truncate the large dataset
1489+
# Default max_rows would truncate at 10 rows, so we don't set it here to test
1490+
# that memory limit is respected even with default row limit
14861491
configure_formatter(max_memory_bytes=2 * MB, min_rows=1)
14871492
html_output = large_df._repr_html_()
14881493
tr_count = count_table_rows(html_output)
@@ -1491,6 +1496,8 @@ def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_sta
14911496
assert tr_count < unrestricted_rows
14921497

14931498
# Test 3: Very large memory limit should show much more data
1499+
# NOTE: max_rows=200000 is critical here - without it, default max_rows=10
1500+
# would limit output to 10 rows even though we have 100MB of memory available
14941501
configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000)
14951502
html_output = large_df._repr_html_()
14961503
tr_count = count_table_rows(html_output)
@@ -1507,6 +1514,7 @@ def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_sta
15071514
assert "data truncated" in html_output.lower()
15081515

15091516
# Test 5: With reasonable memory and min_rows settings
1517+
# NOTE: max_rows=200000 ensures we test memory limit behavior, not row limit
15101518
configure_formatter(max_memory_bytes=2 * MB, min_rows=10, max_rows=200000)
15111519
html_output = large_df._repr_html_()
15121520
tr_count = count_table_rows(html_output)

0 commit comments

Comments
 (0)