From 1efd0ab1c84b6bb4b1e3ba00352137dd21373325 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 18 Jul 2025 03:46:34 +0000 Subject: [PATCH 1/9] feat: add index get_loc API --- bigframes/core/indexes/base.py | 94 ++++++++++++++++ tests/system/small/test_index.py | 100 ++++++++++++++++++ .../pandas/core/indexes/base.py | 35 ++++++ 3 files changed, 229 insertions(+) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index b442f87aec..5b7c44ed59 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -27,16 +27,21 @@ import pandas from bigframes import dtypes +from bigframes.core.array_value import ArrayValue import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex +import bigframes.core.identifiers as ids +import bigframes.core.nodes as nodes import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations +import bigframes.core.window_spec as window_spec import bigframes.dtypes import bigframes.formatting_helpers as formatter import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops +import bigframes.series if typing.TYPE_CHECKING: import bigframes.dataframe @@ -247,6 +252,95 @@ def query_job(self) -> bigquery.QueryJob: self._query_job = query_job return self._query_job + def get_loc(self, key): + """Get integer location, slice or boolean mask for requested label. + + Args: + key: The label to search for in the index. + + Returns: + An integer, slice, or boolean mask representing the location(s) of the key. + + Raises: + NotImplementedError: If the index has more than one level. + KeyError: If the key is not found in the index. + """ + + if self.nlevels != 1: + raise NotImplementedError("get_loc only supports single-level indexes") + + # Get the index column from the block + index_column = self._block.index_columns[0] + + # Apply row numbering to the original data + win_spec = window_spec.unbound() + row_num_agg = ex.NullaryAggregation(agg_ops.RowNumberOp()) + row_num_col_id = ids.ColumnId.unique() + + window_node = nodes.WindowOpNode( + child=self._block._expr.node, + expression=row_num_agg, + window_spec=win_spec, + output_name=row_num_col_id, + never_skip_nulls=True, + ) + + windowed_array = ArrayValue(window_node) + windowed_block = self._block.__class__( + windowed_array, + index_columns=self._block.index_columns, + column_labels=self._block.column_labels.insert( + len(self._block.column_labels), None + ), + index_labels=self._block._index_labels, + ) + + # Create expression to find matching positions + match_expr = ops.eq_op.as_expr(ex.deref(index_column), ex.const(key)) + windowed_block, match_col_id = windowed_block.project_expr(match_expr) + + # Filter to only rows where the key matches + filtered_block = windowed_block.filter_by_id(match_col_id) + + # Check if key exists at all by counting on the filtered block + count_agg = ex.UnaryAggregation(agg_ops.count_op, ex.deref(row_num_col_id.name)) + count_result = filtered_block._expr.aggregate([(count_agg, "count")]) + count_scalar = self._block.session._executor.execute( + count_result + ).to_py_scalar() + + if count_scalar == 0: + raise KeyError(f"'{key}' is not in index") + + # If only one match, return integer position + if count_scalar == 1: + min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) + position_result = filtered_block._expr.aggregate([(min_agg, "position")]) + position_scalar = self._block.session._executor.execute( + position_result + ).to_py_scalar() + return int(position_scalar) + + # Multiple matches - need to determine if monotonic or not + is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing + if is_monotonic: + # Get min and max positions for slice + min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) + max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name)) + min_result = filtered_block._expr.aggregate([(min_agg, "min_pos")]) + max_result = filtered_block._expr.aggregate([(max_agg, "max_pos")]) + min_pos = self._block.session._executor.execute(min_result).to_py_scalar() + max_pos = self._block.session._executor.execute(max_result).to_py_scalar() + + # create slice + start = int(min_pos) + stop = int(max_pos) + 1 # exclusive + return slice(start, stop, None) + else: + # Return boolean mask for non-monotonic duplicates + mask_block = windowed_block.select_columns([match_col_id]) + return bigframes.series.Series(mask_block) + def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index c8da85dca1..cbe3a9dab1 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -32,6 +32,106 @@ def test_index_construct_from_list(): pd.testing.assert_index_equal(bf_result, pd_result) +@pytest.mark.parametrize("key, expected_loc", [("a", 0), ("b", 1), ("c", 2)]) +def test_get_loc_should_return_int_for_unique_index(key, expected_loc): + """Behavior: get_loc on a unique index returns an integer position.""" + # The pandas result is used as the known-correct value. + # We assert our implementation matches it and the expected type. + bf_index = bpd.Index(["a", "b", "c"]) + + result = bf_index.get_loc(key) + + assert result == expected_loc + assert isinstance(result, int) + + +def test_get_loc_should_return_slice_for_monotonic_duplicates(): + """Behavior: get_loc on a monotonic string index with duplicates returns a slice.""" + bf_index = bpd.Index(["a", "b", "b", "c"]) + pd_index = pd.Index(["a", "b", "b", "c"]) + + bf_result = bf_index.get_loc("b") + pd_result = pd_index.get_loc("b") + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(1, 3, None) + + +def test_get_loc_should_return_slice_for_monotonic_numeric_duplicates(): + """Behavior: get_loc on a monotonic numeric index with duplicates returns a slice.""" + bf_index = bpd.Index([1, 2, 2, 3]) + pd_index = pd.Index([1, 2, 2, 3]) + + bf_result = bf_index.get_loc(2) + pd_result = pd_index.get_loc(2) + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(1, 3, None) + + +def test_get_loc_should_return_mask_for_non_monotonic_duplicates(): + """Behavior: get_loc on a non-monotonic string index returns a boolean array.""" + bf_index = bpd.Index(["a", "b", "c", "b"]) + pd_index = pd.Index(["a", "b", "c", "b"]) + + bf_result = bf_index.get_loc("b") + if hasattr(bf_result, "to_numpy"): + bf_array = bf_result.to_numpy() + else: + bf_array = bf_result.to_pandas().to_numpy() + pd_result = pd_index.get_loc("b") + + numpy.testing.assert_array_equal(bf_array, pd_result) + + +def test_get_loc_should_return_mask_for_non_monotonic_numeric_duplicates(): + """Behavior: get_loc on a non-monotonic numeric index returns a boolean array.""" + bf_index = bpd.Index([1, 2, 3, 2]) + pd_index = pd.Index([1, 2, 3, 2]) + + bf_result = bf_index.get_loc(2) + if hasattr(bf_result, "to_numpy"): + bf_array = bf_result.to_numpy() + else: + bf_array = bf_result.to_pandas().to_numpy() + pd_result = pd_index.get_loc(2) + + numpy.testing.assert_array_equal(bf_array, pd_result) + + +def test_get_loc_should_raise_error_for_missing_key(): + """Behavior: get_loc raises KeyError when a string key is not found.""" + bf_index = bpd.Index(["a", "b", "c"]) + + with pytest.raises(KeyError): + bf_index.get_loc("d") + + +def test_get_loc_should_raise_error_for_missing_numeric_key(): + """Behavior: get_loc raises KeyError when a numeric key is not found.""" + bf_index = bpd.Index([1, 2, 3]) + + with pytest.raises(KeyError): + bf_index.get_loc(4) + + +def test_get_loc_should_work_for_single_element_index(): + """Behavior: get_loc on a single-element index returns 0.""" + assert bpd.Index(["a"]).get_loc("a") == pd.Index(["a"]).get_loc("a") + + +def test_get_loc_should_return_slice_when_all_elements_are_duplicates(): + """Behavior: get_loc returns a full slice if all elements match the key.""" + bf_index = bpd.Index(["a", "a", "a"]) + pd_index = pd.Index(["a", "a", "a"]) + + bf_result = bf_index.get_loc("a") + pd_result = pd_index.get_loc("a") + + assert isinstance(bf_result, slice) + assert bf_result == pd_result # Should be slice(0, 3, None) + + def test_index_construct_from_series(): bf_result = bpd.Index( bpd.Series([3, 14, 159], dtype=pd.Float64Dtype(), name="series_name"), diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index 6a6bb96897..0a7d1b4af9 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -741,6 +741,41 @@ def argmin(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def get_loc(self, key): + """ + Get integer location, slice or boolean mask for requested label. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> unique_index = bpd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = bpd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = bpd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True]) + + Args: + key: Label to get the location for. + + Returns: + int if unique index, slice if monotonic index with duplicates, else boolean array: + Integer position of the label for unique indexes. + Slice object for monotonic indexes with duplicates. + Boolean array mask for non-monotonic indexes with duplicates. + + Raises: + KeyError: If the key is not found in the index. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def argmax(self) -> int: """ Return int position of the largest value in the Series. From 594aec4c1918897901d4c0ac31b0b73dc9086e37 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 18 Jul 2025 19:37:58 +0000 Subject: [PATCH 2/9] update docstring --- bigframes/core/indexes/base.py | 4 +++- .../pandas/core/indexes/base.py | 15 +++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 5b7c44ed59..2629baa9bc 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -252,7 +252,9 @@ def query_job(self) -> bigquery.QueryJob: self._query_job = query_job return self._query_job - def get_loc(self, key): + def get_loc( + self, key: typing.Any + ) -> typing.Union[int, slice, "bigframes.series.Series"]: """Get integer location, slice or boolean mask for requested label. Args: diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index 0a7d1b4af9..eba47fc1f9 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -4,6 +4,7 @@ from collections.abc import Hashable import typing +import bigframes from bigframes import constants @@ -741,7 +742,9 @@ def argmin(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def get_loc(self, key): + def get_loc( + self, key: typing.Any + ) -> typing.Union[int, slice, bigframes.series.Series]: """ Get integer location, slice or boolean mask for requested label. @@ -760,16 +763,20 @@ def get_loc(self, key): >>> non_monotonic_index = bpd.Index(list('abcb')) >>> non_monotonic_index.get_loc('b') - array([False, True, False, True]) + 0 False + 1 True + 2 False + 3 True + dtype: boolean Args: key: Label to get the location for. Returns: - int if unique index, slice if monotonic index with duplicates, else boolean array: + Union[int, slice, bigframes.pandas.Series]: Integer position of the label for unique indexes. Slice object for monotonic indexes with duplicates. - Boolean array mask for non-monotonic indexes with duplicates. + Boolean Series mask for non-monotonic indexes with duplicates. Raises: KeyError: If the key is not found in the index. From fca04ac5f66df22e9d1535cd2a166646a7754d4a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 22 Jul 2025 21:51:28 +0000 Subject: [PATCH 3/9] code update --- bigframes/core/indexes/base.py | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 2629baa9bc..604797c4a8 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -256,13 +256,10 @@ def get_loc( self, key: typing.Any ) -> typing.Union[int, slice, "bigframes.series.Series"]: """Get integer location, slice or boolean mask for requested label. - Args: key: The label to search for in the index. - Returns: An integer, slice, or boolean mask representing the location(s) of the key. - Raises: NotImplementedError: If the index has more than one level. KeyError: If the key is not found in the index. @@ -274,15 +271,12 @@ def get_loc( # Get the index column from the block index_column = self._block.index_columns[0] - # Apply row numbering to the original data - win_spec = window_spec.unbound() - row_num_agg = ex.NullaryAggregation(agg_ops.RowNumberOp()) + # Apply row numbering to the original data - inline single-use variables row_num_col_id = ids.ColumnId.unique() - window_node = nodes.WindowOpNode( child=self._block._expr.node, - expression=row_num_agg, - window_spec=win_spec, + expression=ex.NullaryAggregation(agg_ops.RowNumberOp()), + window_spec=window_spec.unbound(), output_name=row_num_col_id, never_skip_nulls=True, ) @@ -326,23 +320,29 @@ def get_loc( # Multiple matches - need to determine if monotonic or not is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing if is_monotonic: - # Get min and max positions for slice - min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) - max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name)) - min_result = filtered_block._expr.aggregate([(min_agg, "min_pos")]) - max_result = filtered_block._expr.aggregate([(max_agg, "max_pos")]) - min_pos = self._block.session._executor.execute(min_result).to_py_scalar() - max_pos = self._block.session._executor.execute(max_result).to_py_scalar() - - # create slice - start = int(min_pos) - stop = int(max_pos) + 1 # exclusive - return slice(start, stop, None) + return self._get_monotonic_slice(filtered_block, row_num_col_id) else: # Return boolean mask for non-monotonic duplicates mask_block = windowed_block.select_columns([match_col_id]) return bigframes.series.Series(mask_block) + def _get_monotonic_slice(self, filtered_block, row_num_col_id): + """Helper method to get slice for monotonic duplicates with optimized query.""" + # Combine min and max aggregations into single query using to_pandas() + min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) + max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name)) + combined_result = filtered_block._expr.aggregate( + [(min_agg, "min_pos"), (max_agg, "max_pos")] + ) + result_df = self._block.session._executor.execute(combined_result).to_pandas() + min_pos = result_df["min_pos"].iloc[0] + max_pos = result_df["max_pos"].iloc[0] + + # Create slice + start = int(min_pos) + stop = int(max_pos) + 1 # exclusive + return slice(start, stop, None) + def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 From d4730f3bd0a32346c4514f68debfbe8dc6d38b53 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 22 Jul 2025 22:25:15 +0000 Subject: [PATCH 4/9] final polish of the helper function --- bigframes/core/indexes/base.py | 69 +++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 604797c4a8..4a88a77f29 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -252,32 +252,33 @@ def query_job(self) -> bigquery.QueryJob: self._query_job = query_job return self._query_job - def get_loc( - self, key: typing.Any - ) -> typing.Union[int, slice, "bigframes.series.Series"]: + def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: """Get integer location, slice or boolean mask for requested label. + Args: - key: The label to search for in the index. + key: + The label to search for in the index. + Returns: An integer, slice, or boolean mask representing the location(s) of the key. + Raises: NotImplementedError: If the index has more than one level. KeyError: If the key is not found in the index. """ - if self.nlevels != 1: raise NotImplementedError("get_loc only supports single-level indexes") # Get the index column from the block index_column = self._block.index_columns[0] - # Apply row numbering to the original data - inline single-use variables - row_num_col_id = ids.ColumnId.unique() + # Apply row numbering to the original data + row_number_column_id = ids.ColumnId.unique() window_node = nodes.WindowOpNode( child=self._block._expr.node, expression=ex.NullaryAggregation(agg_ops.RowNumberOp()), window_spec=window_spec.unbound(), - output_name=row_num_col_id, + output_name=row_number_column_id, never_skip_nulls=True, ) @@ -299,7 +300,9 @@ def get_loc( filtered_block = windowed_block.filter_by_id(match_col_id) # Check if key exists at all by counting on the filtered block - count_agg = ex.UnaryAggregation(agg_ops.count_op, ex.deref(row_num_col_id.name)) + count_agg = ex.UnaryAggregation( + agg_ops.count_op, ex.deref(row_number_column_id.name) + ) count_result = filtered_block._expr.aggregate([(count_agg, "count")]) count_scalar = self._block.session._executor.execute( count_result @@ -310,38 +313,52 @@ def get_loc( # If only one match, return integer position if count_scalar == 1: - min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) + min_agg = ex.UnaryAggregation( + agg_ops.min_op, ex.deref(row_number_column_id.name) + ) position_result = filtered_block._expr.aggregate([(min_agg, "position")]) position_scalar = self._block.session._executor.execute( position_result ).to_py_scalar() return int(position_scalar) - # Multiple matches - need to determine if monotonic or not + # Handle multiple matches based on index monotonicity is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing if is_monotonic: - return self._get_monotonic_slice(filtered_block, row_num_col_id) + return self._get_monotonic_slice(filtered_block, row_number_column_id) else: # Return boolean mask for non-monotonic duplicates mask_block = windowed_block.select_columns([match_col_id]) return bigframes.series.Series(mask_block) - def _get_monotonic_slice(self, filtered_block, row_num_col_id): - """Helper method to get slice for monotonic duplicates with optimized query.""" - # Combine min and max aggregations into single query using to_pandas() - min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name)) - max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name)) - combined_result = filtered_block._expr.aggregate( - [(min_agg, "min_pos"), (max_agg, "max_pos")] - ) + def _get_monotonic_slice( + self, filtered_block, row_number_column_id: "ids.ColumnId" + ) -> slice: + """Helper method to get a slice for monotonic duplicates with an optimized query.""" + # Combine min and max aggregations into a single query for efficiency + min_max_aggs = [ + ( + ex.UnaryAggregation( + agg_ops.min_op, ex.deref(row_number_column_id.name) + ), + "min_pos", + ), + ( + ex.UnaryAggregation( + agg_ops.max_op, ex.deref(row_number_column_id.name) + ), + "max_pos", + ), + ] + combined_result = filtered_block._expr.aggregate(min_max_aggs) + + # Execute query and extract positions result_df = self._block.session._executor.execute(combined_result).to_pandas() - min_pos = result_df["min_pos"].iloc[0] - max_pos = result_df["max_pos"].iloc[0] + min_pos = int(result_df["min_pos"].iloc[0]) + max_pos = int(result_df["max_pos"].iloc[0]) - # Create slice - start = int(min_pos) - stop = int(max_pos) + 1 # exclusive - return slice(start, stop, None) + # Create slice (stop is exclusive) + return slice(min_pos, max_pos + 1) def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: From 9e8a46f7700f47741609ddf3de4f1ca64662d974 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 22 Jul 2025 22:55:31 +0000 Subject: [PATCH 5/9] fix mypy --- tests/system/small/test_index.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index cbe3a9dab1..a82bdf7635 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -74,13 +74,15 @@ def test_get_loc_should_return_mask_for_non_monotonic_duplicates(): bf_index = bpd.Index(["a", "b", "c", "b"]) pd_index = pd.Index(["a", "b", "c", "b"]) + pd_result = pd_index.get_loc("b") bf_result = bf_index.get_loc("b") + + assert not isinstance(bf_result, (int, slice)) + if hasattr(bf_result, "to_numpy"): bf_array = bf_result.to_numpy() else: bf_array = bf_result.to_pandas().to_numpy() - pd_result = pd_index.get_loc("b") - numpy.testing.assert_array_equal(bf_array, pd_result) @@ -89,13 +91,15 @@ def test_get_loc_should_return_mask_for_non_monotonic_numeric_duplicates(): bf_index = bpd.Index([1, 2, 3, 2]) pd_index = pd.Index([1, 2, 3, 2]) + pd_result = pd_index.get_loc(2) bf_result = bf_index.get_loc(2) + + assert not isinstance(bf_result, (int, slice)) + if hasattr(bf_result, "to_numpy"): bf_array = bf_result.to_numpy() else: bf_array = bf_result.to_pandas().to_numpy() - pd_result = pd_index.get_loc(2) - numpy.testing.assert_array_equal(bf_array, pd_result) From b8c9e2258f20820ab38b4d6b14c7ba0e2ccd7a8f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 23 Jul 2025 17:22:58 +0000 Subject: [PATCH 6/9] reset index of result --- bigframes/core/indexes/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 4a88a77f29..f7b33a93c2 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -329,6 +329,8 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: else: # Return boolean mask for non-monotonic duplicates mask_block = windowed_block.select_columns([match_col_id]) + # Reset the index to use positional integers instead of original index values + mask_block = mask_block.reset_index(drop=True) return bigframes.series.Series(mask_block) def _get_monotonic_slice( From 1d4827fcd61733de1127a33edae72336cc14e269 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 23 Jul 2025 22:05:17 +0000 Subject: [PATCH 7/9] change docstring --- bigframes/core/indexes/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index f7b33a93c2..2ccece7325 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -330,8 +330,9 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: # Return boolean mask for non-monotonic duplicates mask_block = windowed_block.select_columns([match_col_id]) # Reset the index to use positional integers instead of original index values - mask_block = mask_block.reset_index(drop=True) - return bigframes.series.Series(mask_block) + result_series = bigframes.series.Series(mask_block) + # Ensure correct dtype and name to match pandas behavior + return result_series.astype("boolean") def _get_monotonic_slice( self, filtered_block, row_number_column_id: "ids.ColumnId" From 97cdf5122cf3cf482f978f51231e9bf47979216d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 23 Jul 2025 23:18:36 +0000 Subject: [PATCH 8/9] fix docstring --- bigframes/core/indexes/base.py | 3 ++- third_party/bigframes_vendored/pandas/core/indexes/base.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 2ccece7325..4c33fe1e5f 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -330,8 +330,9 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: # Return boolean mask for non-monotonic duplicates mask_block = windowed_block.select_columns([match_col_id]) # Reset the index to use positional integers instead of original index values - result_series = bigframes.series.Series(mask_block) + mask_block = mask_block.reset_index(drop=True) # Ensure correct dtype and name to match pandas behavior + result_series = bigframes.series.Series(mask_block) return result_series.astype("boolean") def _get_monotonic_slice( diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index eba47fc1f9..035eba74fd 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -767,7 +767,7 @@ def get_loc( 1 True 2 False 3 True - dtype: boolean + Name: nan, dtype: boolean Args: key: Label to get the location for. From 9a5688c1a22131697ea0998e6ccda10e4f3ae31b Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 24 Jul 2025 17:35:42 +0000 Subject: [PATCH 9/9] change a function call --- bigframes/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 4c33fe1e5f..2bb58da330 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -283,7 +283,7 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]: ) windowed_array = ArrayValue(window_node) - windowed_block = self._block.__class__( + windowed_block = blocks.Block( windowed_array, index_columns=self._block.index_columns, column_labels=self._block.column_labels.insert(