Skip to content

Commit 42f5a72

Browse files
committed
test: add deduplicate join selection tests for DataFrame
1 parent ab224a6 commit 42f5a72

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

python/tests/test_dataframe.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2635,3 +2635,34 @@ def trigger_interrupt():
26352635

26362636
# Make sure the interrupt thread has finished
26372637
interrupt_thread.join(timeout=1.0)
2638+
2639+
2640+
def test_join_deduplicate_select():
2641+
"""Test that select works correctly after a deduplicated join."""
2642+
ctx = SessionContext()
2643+
2644+
left_df = ctx.from_pydict({"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]})
2645+
right_df = ctx.from_pydict({"id": [2, 3, 4], "city": ["New York", "London", "Paris"]})
2646+
2647+
# Join and select the id column to confirm it works
2648+
joined_df = left_df.join(right_df, on="id")
2649+
selected_df = joined_df.select(column("id"))
2650+
result = selected_df.collect()[0]
2651+
2652+
# Should have only the matching ids (2, 3)
2653+
expected_ids = [2, 3]
2654+
assert result.column(0).to_pylist() == expected_ids
2655+
2656+
# Also test selecting multiple columns
2657+
multi_select_df = joined_df.select(column("id"), column("name"), column("city"))
2658+
multi_result = multi_select_df.collect()[0]
2659+
2660+
expected_data = {
2661+
"id": [2, 3],
2662+
"name": ["Bob", "Charlie"],
2663+
"city": ["New York", "London"]
2664+
}
2665+
2666+
assert multi_result.column(0).to_pylist() == expected_data["id"]
2667+
assert multi_result.column(1).to_pylist() == expected_data["name"]
2668+
assert multi_result.column(2).to_pylist() == expected_data["city"]

0 commit comments

Comments
 (0)