Skip to content

Commit af11824

Browse files
committed
Update HTML formatter memory boundary tests for large datasets
1 parent af3ef4b commit af11824

File tree

1 file changed

+26
-19
lines changed

1 file changed

+26
-19
lines changed

python/tests/test_dataframe.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,54 +1458,61 @@ def test_html_formatter_memory(df, clean_formatter_state):
14581458
assert "data truncated" not in html_output.lower()
14591459

14601460

1461-
def test_html_formatter_memory_boundary_conditions(df, clean_formatter_state):
1462-
"""Test memory limit behavior at boundary conditions.
1461+
def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_state):
1462+
"""Test memory limit behavior at boundary conditions with large dataset.
14631463
14641464
This test validates that the formatter correctly handles edge cases when
1465-
the memory limit is very close to actual data size, ensuring that min_rows
1466-
constraint is properly respected while respecting memory limits.
1465+
the memory limit is reached with a large dataset (100,000 rows), ensuring
1466+
that min_rows constraint is properly respected while respecting memory limits.
1467+
Uses large_df to actually test memory limit behavior with realistic data sizes.
14671468
"""
14681469

14691470
# Get the raw size of the data to test boundary conditions
1470-
# First, capture output with no limits
1471-
configure_formatter(max_memory_bytes=10 * MB, min_rows=1, max_rows=100)
1472-
unrestricted_output = df._repr_html_()
1471+
# First, capture output with no limits - use very high max_rows to avoid row limit
1472+
configure_formatter(max_memory_bytes=10 * MB, min_rows=1, max_rows=200000)
1473+
unrestricted_output = large_df._repr_html_()
14731474
unrestricted_rows = count_table_rows(unrestricted_output)
14741475

14751476
# Test 1: Very small memory limit should still respect min_rows
1477+
# With large dataset, this should definitely hit memory limit
14761478
configure_formatter(max_memory_bytes=10, min_rows=1)
1477-
html_output = df._repr_html_()
1479+
html_output = large_df._repr_html_()
14781480
tr_count = count_table_rows(html_output)
14791481
assert tr_count >= 2 # At least header + 1 data row (minimum)
14801482
# Should show truncation since we limited memory so aggressively
14811483
assert "data truncated" in html_output.lower()
14821484

1483-
# Test 2: Memory limit at default size should work well
1485+
# Test 2: Memory limit at default size (2MB) should truncate the large dataset
14841486
configure_formatter(max_memory_bytes=2 * MB, min_rows=1)
1485-
html_output = df._repr_html_()
1487+
html_output = large_df._repr_html_()
14861488
tr_count = count_table_rows(html_output)
14871489
assert tr_count >= 2 # At least header + min_rows
1490+
# Should be truncated since full dataset is much larger than 2MB
1491+
assert tr_count < unrestricted_rows
14881492

1489-
# Test 3: Very large memory limit should show all data
1490-
configure_formatter(max_memory_bytes=100 * MB, min_rows=1)
1491-
html_output = df._repr_html_()
1493+
# Test 3: Very large memory limit should show much more data
1494+
configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000)
1495+
html_output = large_df._repr_html_()
14921496
tr_count = count_table_rows(html_output)
1493-
assert tr_count == unrestricted_rows # Should show all rows
1497+
# Should show significantly more rows, possibly all
1498+
assert tr_count > 100 # Should show substantially more rows
14941499

14951500
# Test 4: Min rows should override memory limit
14961501
# With tiny memory and larger min_rows, min_rows should win
14971502
configure_formatter(max_memory_bytes=10, min_rows=2)
1498-
html_output = df._repr_html_()
1503+
html_output = large_df._repr_html_()
14991504
tr_count = count_table_rows(html_output)
15001505
assert tr_count >= 3 # At least header + 2 data rows (min_rows)
15011506
# Should show truncation message despite min_rows being satisfied
15021507
assert "data truncated" in html_output.lower()
15031508

1504-
# Test 5: Default memory limit with different min_rows
1505-
configure_formatter(max_memory_bytes=2 * MB, min_rows=2, max_rows=2)
1506-
html_output = df._repr_html_()
1509+
# Test 5: With reasonable memory and min_rows settings
1510+
configure_formatter(max_memory_bytes=2 * MB, min_rows=10, max_rows=200000)
1511+
html_output = large_df._repr_html_()
15071512
tr_count = count_table_rows(html_output)
1508-
assert tr_count == 3 # header + 2 data rows
1513+
assert tr_count >= 11 # header + at least 10 data rows (min_rows)
1514+
# Should be truncated due to memory limit
1515+
assert tr_count < unrestricted_rows
15091516

15101517

15111518
def test_html_formatter_max_rows(df, clean_formatter_state):

0 commit comments

Comments
 (0)