Skip to content

Commit 96b4eba

Browse files
committed
code update
1 parent c91e381 commit 96b4eba

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

bigframes/core/indexes/base.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,10 @@ def get_loc(
256256
self, key: typing.Any
257257
) -> typing.Union[int, slice, "bigframes.series.Series"]:
258258
"""Get integer location, slice or boolean mask for requested label.
259-
260259
Args:
261260
key: The label to search for in the index.
262-
263261
Returns:
264262
An integer, slice, or boolean mask representing the location(s) of the key.
265-
266263
Raises:
267264
NotImplementedError: If the index has more than one level.
268265
KeyError: If the key is not found in the index.
@@ -274,15 +271,12 @@ def get_loc(
274271
# Get the index column from the block
275272
index_column = self._block.index_columns[0]
276273

277-
# Apply row numbering to the original data
278-
win_spec = window_spec.unbound()
279-
row_num_agg = ex.NullaryAggregation(agg_ops.RowNumberOp())
274+
# Apply row numbering to the original data - inline single-use variables
280275
row_num_col_id = ids.ColumnId.unique()
281-
282276
window_node = nodes.WindowOpNode(
283277
child=self._block._expr.node,
284-
expression=row_num_agg,
285-
window_spec=win_spec,
278+
expression=ex.NullaryAggregation(agg_ops.RowNumberOp()),
279+
window_spec=window_spec.unbound(),
286280
output_name=row_num_col_id,
287281
never_skip_nulls=True,
288282
)
@@ -326,23 +320,29 @@ def get_loc(
326320
# Multiple matches - need to determine if monotonic or not
327321
is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing
328322
if is_monotonic:
329-
# Get min and max positions for slice
330-
min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name))
331-
max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name))
332-
min_result = filtered_block._expr.aggregate([(min_agg, "min_pos")])
333-
max_result = filtered_block._expr.aggregate([(max_agg, "max_pos")])
334-
min_pos = self._block.session._executor.execute(min_result).to_py_scalar()
335-
max_pos = self._block.session._executor.execute(max_result).to_py_scalar()
336-
337-
# create slice
338-
start = int(min_pos)
339-
stop = int(max_pos) + 1 # exclusive
340-
return slice(start, stop, None)
323+
return self._get_monotonic_slice(filtered_block, row_num_col_id)
341324
else:
342325
# Return boolean mask for non-monotonic duplicates
343326
mask_block = windowed_block.select_columns([match_col_id])
344327
return bigframes.series.Series(mask_block)
345328

329+
def _get_monotonic_slice(self, filtered_block, row_num_col_id):
330+
"""Helper method to get slice for monotonic duplicates with optimized query."""
331+
# Combine min and max aggregations into single query using to_pandas()
332+
min_agg = ex.UnaryAggregation(agg_ops.min_op, ex.deref(row_num_col_id.name))
333+
max_agg = ex.UnaryAggregation(agg_ops.max_op, ex.deref(row_num_col_id.name))
334+
combined_result = filtered_block._expr.aggregate(
335+
[(min_agg, "min_pos"), (max_agg, "max_pos")]
336+
)
337+
result_df = self._block.session._executor.execute(combined_result).to_pandas()
338+
min_pos = result_df["min_pos"].iloc[0]
339+
max_pos = result_df["max_pos"].iloc[0]
340+
341+
# Create slice
342+
start = int(min_pos)
343+
stop = int(max_pos) + 1 # exclusive
344+
return slice(start, stop, None)
345+
346346
def __repr__(self) -> str:
347347
# Protect against errors with uninitialized Series. See:
348348
# https://github.com/googleapis/python-bigquery-dataframes/issues/728

0 commit comments

Comments
 (0)