diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index ea34a465c1..b442f87aec 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -16,8 +16,9 @@ from __future__ import annotations +import functools import typing -from typing import Hashable, Literal, Optional, overload, Sequence, Union +from typing import cast, Hashable, Literal, Optional, overload, Sequence, Union import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.indexes.base as vendored_pandas_index @@ -529,6 +530,29 @@ def isin(self, values) -> Index: ) ).fillna(value=False) + def __contains__(self, key) -> bool: + hash(key) # to throw for unhashable values + if self.nlevels == 0: + return False + + if (not isinstance(key, tuple)) or (self.nlevels == 1): + key = (key,) + + match_exprs = [] + for key_part, index_col, dtype in zip( + key, self._block.index_columns, self._block.index.dtypes + ): + key_type = bigframes.dtypes.is_compatible(key_part, dtype) + if key_type is None: + return False + key_expr = ex.const(key_part, key_type) + match_expr = ops.eq_null_match_op.as_expr(ex.deref(index_col), key_expr) + match_exprs.append(match_expr) + + match_expr_final = functools.reduce(ops.and_op.as_expr, match_exprs) + block, match_col = self._block.project_expr(match_expr_final) + return cast(bool, block.get_stat(match_col, agg_ops.AnyOp())) + def _apply_unary_expr( self, op: ex.Expression, diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 8e6b8efbc8..22c66719f7 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -374,6 +374,9 @@ def __len__(self): def __iter__(self): return iter(self.columns) + def __contains__(self, key) -> bool: + return key in self.columns + def astype( self, dtype: Union[ diff --git a/bigframes/series.py b/bigframes/series.py index d7833fef2a..3a1af0bb1d 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -257,6 +257,9 @@ def __iter__(self) -> typing.Iterator: map(lambda x: x.squeeze(axis=1), self._block.to_pandas_batches()) ) + def __contains__(self, key) -> bool: + return key in self.index + def copy(self) -> Series: return Series(self._block) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 4e74fe020f..caf39bd9e9 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -4451,6 +4451,22 @@ def test_df___array__(scalars_df_index, scalars_pandas_df_index): ) +@pytest.mark.parametrize( + ("key",), + [ + ("hello",), + (2,), + ("int64_col",), + (None,), + ], +) +def test_df_contains(scalars_df_index, scalars_pandas_df_index, key): + bf_result = key in scalars_df_index + pd_result = key in scalars_pandas_df_index + + assert bf_result == pd_result + + def test_df_getattr_attribute_error_when_pandas_has(scalars_df_index): # swapaxes is implemented in pandas but not in bigframes with pytest.raises(AttributeError): diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index 2b2364d3bc..c8da85dca1 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -398,6 +398,18 @@ def test_index_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep): ) +@pytest.mark.parametrize( + ("key",), + [("hello",), (2,), (123123321,), (2.0,), (False,), ((2,),), (pd.NA,)], +) +def test_index_contains(scalars_df_index, scalars_pandas_df_index, key): + col_name = "int64_col" + bf_result = key in scalars_df_index.set_index(col_name).index + pd_result = key in scalars_pandas_df_index.set_index(col_name).index + + assert bf_result == pd_result + + def test_index_isin_list(scalars_df_index, scalars_pandas_df_index): col_name = "int64_col" bf_series = ( diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index e4852cc8fb..13b5b1886f 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -1388,3 +1388,26 @@ def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index # Pandas produces pd.NA, where bq dataframes produces NaN pd_result["c"] = pd_result["c"].replace(pandas.NA, np.nan) pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + + +@pytest.mark.parametrize( + ("key",), + [ + ("hello",), + (2,), + (123123321,), + (2.0,), + (pandas.NA,), + (False,), + ((2,),), + ((2, False),), + ((2.0, False),), + ((2, True),), + ], +) +def test_multi_index_contains(scalars_df_index, scalars_pandas_df_index, key): + col_name = ["int64_col", "bool_col"] + bf_result = key in scalars_df_index.set_index(col_name).index + pd_result = key in scalars_pandas_df_index.set_index(col_name).index + + assert bf_result == pd_result diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index cf41daeb51..a1c7c0f1a3 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -396,3 +396,7 @@ def test_null_index_index_property(scalars_df_null_index): def test_null_index_transpose(scalars_df_null_index): with pytest.raises(bigframes.exceptions.NullIndexError): _ = scalars_df_null_index.T + + +def test_null_index_contains(scalars_df_null_index): + assert 3 not in scalars_df_null_index diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index a69c6b945b..3f64234293 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -424,6 +424,22 @@ def test_series_get_column_default(scalars_dfs): assert result == "default_val" +@pytest.mark.parametrize( + ("key",), + [ + ("hello",), + (2,), + ("int64_col",), + (None,), + ], +) +def test_series_contains(scalars_df_index, scalars_pandas_df_index, key): + bf_result = key in scalars_df_index["int64_col"] + pd_result = key in scalars_pandas_df_index["int64_col"] + + assert bf_result == pd_result + + def test_series_equals_identical(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.int64_col.equals(scalars_df_index.int64_col) pd_result = scalars_pandas_df_index.int64_col.equals(