Skip to content

Commit a00256c

Browse files
committed
wrap up support for single index
1 parent ed8047b commit a00256c

File tree

5 files changed

+69
-33
lines changed

5 files changed

+69
-33
lines changed

bigframes/core/blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2351,7 +2351,7 @@ def merge(
23512351
joined_expr, left_post_join_ids, right_post_join_ids
23522352
)
23532353
else:
2354-
joined_expr, resolved_join_ids = resolve_col_join_ids(
2354+
joined_expr, resolved_join_ids = resolve_col_join_ids( # type: ignore
23552355
joined_expr,
23562356
left_post_join_ids,
23572357
right_post_join_ids,

bigframes/core/reshape/merge.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from typing import Literal, Sequence
2222

23+
from bigframes_vendored import constants
2324
import bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge
2425

2526
from bigframes import dataframe, series
@@ -116,15 +117,24 @@ def _validate_left_right_on(
116117
right_index: bool = False,
117118
) -> tuple[list[str], list[str]]:
118119
# Turn left_on and right_on to lists
119-
if left_on is not None and not isinstance(left_on, (tuple, list)):
120+
if left_on is not None and not isinstance(left_on, Sequence):
120121
left_on = [left_on]
121-
if right_on is not None and not isinstance(right_on, (tuple, list)):
122+
if right_on is not None and not isinstance(right_on, Sequence):
122123
right_on = [right_on]
123124

125+
if left_index and left.index.nlevels > 1:
126+
raise ValueError(
127+
f"Joining with multi-level index is not supported. {constants.FEEDBACK_LINK}"
128+
)
129+
if right_index and right.index.nlevels > 1:
130+
raise ValueError(
131+
f"Joining with multi-level index is not supported. {constants.FEEDBACK_LINK}"
132+
)
133+
124134
# The following checks are copied from Pandas.
125135
if on is None and left_on is None and right_on is None:
126136
if left_index and right_index:
127-
return list(left._block.index_columns), (right._block.index_columns)
137+
return list(left._block.index_columns), list(right._block.index_columns)
128138
elif left_index:
129139
raise ValueError("Must pass right_on or right_index=True")
130140
elif right_index:
@@ -145,7 +155,7 @@ def _validate_left_right_on(
145155
or not right.columns.join(common_cols, how="inner").is_unique
146156
):
147157
raise ValueError(f"Data columns not unique: {repr(common_cols)}")
148-
return _to_col_ids(left, common_cols), _to_col_ids(right, common_cols)
158+
return _to_col_ids(left, common_cols.to_list()), _to_col_ids(right, common_cols.to_list())
149159

150160
elif on is not None:
151161
if left_on is not None or right_on is not None:
@@ -167,7 +177,6 @@ def _validate_left_right_on(
167177
)
168178
if not right_index and right_on is None:
169179
raise ValueError('Must pass "right_on" OR "right_index".')
170-
n = len(left_on)
171180
if right_index:
172181
if len(left_on) != right.index.nlevels:
173182
raise ValueError(
@@ -183,7 +192,6 @@ def _validate_left_right_on(
183192
)
184193
if not left_index and left_on is None:
185194
raise ValueError('Must pass "left_on" OR "left_index".')
186-
n = len(right_on)
187195
if left_index:
188196
if len(right_on) != left.index.nlevels:
189197
raise ValueError(
@@ -193,7 +201,7 @@ def _validate_left_right_on(
193201
return list(left._block.index_columns), _to_col_ids(right, right_on)
194202

195203
# The user correctly specified left_on and right_on
196-
if len(right_on) != len(left_on):
204+
if len(right_on) != len(left_on): # type: ignore
197205
raise ValueError("len(right_on) must equal len(left_on)")
198206

199207
return _to_col_ids(left, left_on), _to_col_ids(right, right_on)

tests/system/small/core/test_reshape.py

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_join_with_index(
4444
right_on=right_on,
4545
left_index=left_index,
4646
right_index=right_index,
47-
how=how
47+
how=how,
4848
).to_pandas()
4949
pd_result = pd.merge(
5050
df1,
@@ -53,14 +53,42 @@ def test_join_with_index(
5353
right_on=right_on,
5454
left_index=left_index,
5555
right_index=right_index,
56-
how=how
56+
how=how,
5757
)
5858

5959
pandas.testing.assert_frame_equal(
6060
bf_result, pd_result, check_dtype=False, check_index_type=False
6161
)
6262

6363

64+
@pytest.mark.parametrize(
65+
("on", "left_on", "right_on", "left_index", "right_index"),
66+
[
67+
(None, "col_a", None, True, False),
68+
(None, None, "col_c", None, True),
69+
("col_a", None, None, True, True),
70+
],
71+
)
72+
def test_join_with_index_invalid_index_arg_raise_error(
73+
session: session.Session, on, left_on, right_on, left_index, right_index
74+
):
75+
df1 = pd.DataFrame({"col_a": [1, 2, 3], "col_b": [2, 3, 4]}, index=[1, 2, 3])
76+
bf1 = session.read_pandas(df1)
77+
df2 = pd.DataFrame({"col_c": [1, 2, 3], "col_d": [2, 3, 4]}, index=[2, 3, 4])
78+
bf2 = session.read_pandas(df2)
79+
80+
with pytest.raises(ValueError):
81+
merge.merge(
82+
bf1,
83+
bf2,
84+
on=on,
85+
left_on=left_on,
86+
right_on=right_on,
87+
left_index=left_index,
88+
right_index=right_index,
89+
).to_pandas()
90+
91+
6492
@pytest.mark.parametrize(
6593
("left_on", "right_on", "left_index", "right_index"),
6694
[
@@ -70,7 +98,7 @@ def test_join_with_index(
7098
],
7199
)
72100
@pytest.mark.parametrize("how", ["inner", "left", "right", "outer"])
73-
def test_join_with_multiindex(
101+
def test_join_with_multiindex_raises_error(
74102
session: session.Session, left_on, right_on, left_index, right_index, how
75103
):
76104
multi_idx1 = pd.MultiIndex.from_tuples([(1, 2), (2, 3), (3, 5)])
@@ -80,25 +108,13 @@ def test_join_with_multiindex(
80108
df2 = pd.DataFrame({"col_c": [1, 2, 3], "col_d": [2, 3, 4]}, index=multi_idx2)
81109
bf2 = session.read_pandas(df2)
82110

83-
bf_result = merge.merge(
84-
bf1,
85-
bf2,
86-
left_on=left_on,
87-
right_on=right_on,
88-
left_index=left_index,
89-
right_index=right_index,
90-
how=how
91-
).to_pandas()
92-
pd_result = pd.merge(
93-
df1,
94-
df2,
95-
left_on=left_on,
96-
right_on=right_on,
97-
left_index=left_index,
98-
right_index=right_index,
99-
how=how
100-
)
101-
102-
pandas.testing.assert_frame_equal(
103-
bf_result.sort_index(), pd_result.sort_index(), check_dtype=False, check_index_type=False,
104-
)
111+
with pytest.raises(ValueError):
112+
merge.merge(
113+
bf1,
114+
bf2,
115+
left_on=left_on,
116+
right_on=right_on,
117+
left_index=left_index,
118+
right_index=right_index,
119+
how=how,
120+
)

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4593,6 +4593,8 @@ def merge(
45934593
*,
45944594
left_on: Optional[str] = None,
45954595
right_on: Optional[str] = None,
4596+
left_index: bool = False,
4597+
right_index: bool = False,
45964598
sort: bool = False,
45974599
suffixes: tuple[str, str] = ("_x", "_y"),
45984600
) -> DataFrame:
@@ -4705,6 +4707,10 @@ def merge(
47054707
right_on (label or list of labels):
47064708
Columns to join on in the right DataFrame. Either on or left_on + right_on
47074709
must be passed in.
4710+
left_index (bool, default False):
4711+
Use the index from the left DataFrame as the join key.
4712+
right_index (bool, default False):
4713+
Use the index from the right DataFrame as the join key.
47084714
sort:
47094715
Default False. Sort the join keys lexicographically in the
47104716
result DataFrame. If False, the order of the join keys depends

third_party/bigframes_vendored/pandas/core/reshape/merge.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ def merge(
1313
*,
1414
left_on=None,
1515
right_on=None,
16+
left_index: bool = False,
17+
right_index: bool = False,
1618
sort=False,
1719
suffixes=("_x", "_y"),
1820
):
@@ -61,6 +63,10 @@ def merge(
6163
right_on (label or list of labels):
6264
Columns to join on in the right DataFrame. Either on or left_on + right_on
6365
must be passed in.
66+
left_index (bool, default False):
67+
Use the index from the left DataFrame as the join key.
68+
right_index (bool, default False):
69+
Use the index from the right DataFrame as the join key.
6470
sort:
6571
Default False. Sort the join keys lexicographically in the
6672
result DataFrame. If False, the order of the join keys depends

0 commit comments

Comments
 (0)