Skip to content

Commit 15242e0

Browse files
committed
Update testing.assert_frame_equal to describe all columns with differences
1 parent 7bf6660 commit 15242e0

File tree

2 files changed

+98
-23
lines changed

2 files changed

+98
-23
lines changed

pandas/_testing/asserters.py

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,9 @@ def assert_frame_equal(
13151315
if check_like:
13161316
left = left.reindex_like(right)
13171317

1318+
column_errors = []
1319+
first_error_message = None
1320+
13181321
# compare by blocks
13191322
if by_blocks:
13201323
rblocks = right._to_dict_of_blocks()
@@ -1338,29 +1341,45 @@ def assert_frame_equal(
13381341
# use check_index=False, because we do not want to run
13391342
# assert_index_equal for each column,
13401343
# as we already checked it for the whole dataframe before.
1341-
with warnings.catch_warnings():
1342-
warnings.filterwarnings(
1343-
"ignore",
1344-
message="the 'check_datetimelike_compat' keyword",
1345-
category=Pandas4Warning,
1346-
)
1347-
assert_series_equal(
1348-
lcol,
1349-
rcol,
1350-
check_dtype=check_dtype,
1351-
check_index_type=check_index_type,
1352-
check_exact=check_exact,
1353-
check_names=check_names,
1354-
check_datetimelike_compat=check_datetimelike_compat,
1355-
check_categorical=check_categorical,
1356-
check_freq=check_freq,
1357-
obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
1358-
rtol=rtol,
1359-
atol=atol,
1360-
check_index=False,
1361-
check_flags=False,
1362-
)
1363-
1344+
try:
1345+
with warnings.catch_warnings():
1346+
warnings.filterwarnings(
1347+
"ignore",
1348+
message="the 'check_datetimelike_compat' keyword",
1349+
category=Pandas4Warning,
1350+
)
1351+
assert_series_equal(
1352+
lcol,
1353+
rcol,
1354+
check_dtype=check_dtype,
1355+
check_index_type=check_index_type,
1356+
check_exact=check_exact,
1357+
check_names=check_names,
1358+
check_datetimelike_compat=check_datetimelike_compat,
1359+
check_categorical=check_categorical,
1360+
check_freq=check_freq,
1361+
obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
1362+
rtol=rtol,
1363+
atol=atol,
1364+
check_index=False,
1365+
check_flags=False,
1366+
)
1367+
except AssertionError as e:
1368+
column_errors.append((i, col))
1369+
if first_error_message is None:
1370+
first_error_message = str(e)
1371+
1372+
if column_errors:
1373+
column_indices = [idx for idx, _ in column_errors]
1374+
column_names = [name for _, name in column_errors]
1375+
1376+
error_summary = f"{obj} are different\n\n"
1377+
error_summary += f"Columns with differences (positions {column_indices}):\n"
1378+
error_summary += f"{column_names}\n\n"
1379+
error_summary += f"First difference details:\n"
1380+
error_summary += first_error_message
1381+
1382+
raise AssertionError(error_summary)
13641383

13651384
def assert_equal(left, right, **kwargs) -> None:
13661385
"""

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,59 @@ def test_assert_frame_equal_nested_df_na(na_value):
423423
df1 = DataFrame({"df": [inner]})
424424
df2 = DataFrame({"df": [inner]})
425425
tm.assert_frame_equal(df1, df2)
426+
427+
def test_assert_frame_equal_reports_all_different_columns():
428+
df1 = pd.DataFrame({
429+
"a": [1, 2, 3],
430+
"b": [4, 5, 6],
431+
"c": [7, 8, 9]
432+
})
433+
df2 = pd.DataFrame({
434+
"a": [1, 99, 3],
435+
"b": [4, 5, 6],
436+
"c": [7, 8, 99]
437+
})
438+
439+
with pytest.raises(AssertionError) as exc_info:
440+
tm.assert_frame_equal(df1, df2)
441+
442+
error_msg = str(exc_info.value)
443+
444+
assert "Columns with differences" in error_msg
445+
446+
assert "'a'" in error_msg or '"a"' in error_msg
447+
assert "'c'" in error_msg or '"c"' in error_msg
448+
449+
assert "[0, 2]" in error_msg or "0, 2" in error_msg
450+
451+
lines = error_msg.split('\n')
452+
for i, line in enumerate(lines):
453+
if "Columns with differences" in line:
454+
if i + 1 < len(lines):
455+
column_list_line = lines[i + 1]
456+
assert "'a'" in column_list_line or '"a"' in column_list_line
457+
assert "'c'" in column_list_line or '"c"' in column_list_line
458+
assert not ("'b'" in column_list_line or '"b"' in column_list_line)
459+
460+
def test_assert_frame_equal_all_columns_different():
461+
df1 = pd.DataFrame({
462+
"a": [1, 2],
463+
"b": [3, 4],
464+
"c": [5, 6]
465+
})
466+
df2 = pd.DataFrame({
467+
"a": [10, 20],
468+
"b": [30, 40],
469+
"c": [50, 60]
470+
})
471+
472+
with pytest.raises(AssertionError) as exc_info:
473+
tm.assert_frame_equal(df1, df2)
474+
475+
error_msg = str(exc_info.value)
476+
477+
assert "'a'" in error_msg or '"a"' in error_msg
478+
assert "'b'" in error_msg or '"b"' in error_msg
479+
assert "'c'" in error_msg or '"c"' in error_msg
480+
481+
assert "[0, 1, 2]" in error_msg or "0, 1, 2" in error_msg

0 commit comments

Comments
 (0)