Skip to content

Commit f055619

Browse files
feat: Add str accessor to index
1 parent 0a44e84 commit f055619

File tree

4 files changed

+55
-13
lines changed

4 files changed

+55
-13
lines changed

bigframes/core/indexes/base.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
if typing.TYPE_CHECKING:
4545
import bigframes.dataframe
46+
import bigframes.operations.strings
4647
import bigframes.series
4748

4849

@@ -254,6 +255,12 @@ def query_job(self) -> bigquery.QueryJob:
254255
self._query_job = query_job
255256
return self._query_job
256257

258+
@property
259+
def str(self) -> bigframes.operations.strings.StringMethods:
260+
import bigframes.operations.strings
261+
262+
return bigframes.operations.strings.StringMethods(self)
263+
257264
def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
258265
"""Get integer location, slice or boolean mask for requested label.
259266
@@ -317,7 +324,9 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
317324
result_series = bigframes.series.Series(mask_block)
318325
return result_series.astype("boolean")
319326

320-
def _get_monotonic_slice(self, filtered_block, offsets_id: str) -> slice:
327+
def _get_monotonic_slice(
328+
self, filtered_block, offsets_id: __builtins__.str
329+
) -> slice:
321330
"""Helper method to get a slice for monotonic duplicates with an optimized query."""
322331
# Combine min and max aggregations into a single query for efficiency
323332
min_max_aggs = [
@@ -343,7 +352,7 @@ def _get_monotonic_slice(self, filtered_block, offsets_id: str) -> slice:
343352
# Create slice (stop is exclusive)
344353
return slice(min_pos, max_pos + 1)
345354

346-
def __repr__(self) -> str:
355+
def __repr__(self) -> __builtins__.str:
347356
# Protect against errors with uninitialized Series. See:
348357
# https://github.com/googleapis/python-bigquery-dataframes/issues/728
349358
if not hasattr(self, "_block"):
@@ -417,7 +426,7 @@ def sort_values(
417426
*,
418427
inplace: bool = False,
419428
ascending: bool = True,
420-
na_position: str = "last",
429+
na_position: __builtins__.str = "last",
421430
) -> Index:
422431
if na_position not in ["first", "last"]:
423432
raise ValueError("Param na_position must be one of 'first' or 'last'")
@@ -604,7 +613,7 @@ def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
604613
result = block_ops.dropna(self._block, self._block.index_columns, how=how)
605614
return Index(result)
606615

607-
def drop_duplicates(self, *, keep: str = "first") -> Index:
616+
def drop_duplicates(self, *, keep: __builtins__.str = "first") -> Index:
608617
if keep is not False:
609618
validations.enforce_ordered(self, "drop_duplicates")
610619
block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
@@ -656,6 +665,9 @@ def __contains__(self, key) -> bool:
656665
block, match_col = self._block.project_expr(match_expr_final)
657666
return cast(bool, block.get_stat(match_col, agg_ops.AnyOp()))
658667

668+
def _apply_unary_op(self, op: ops.UnaryOp) -> Index:
669+
return self._apply_unary_expr(op.as_expr(ex.free_var("input")))
670+
659671
def _apply_unary_expr(
660672
self,
661673
op: ex.Expression,
@@ -762,9 +774,15 @@ def item(self):
762774
return self.to_series().peek(2).item()
763775

764776
def __eq__(self, other) -> Index: # type: ignore
765-
return self._apply_binop(other, ops.eq_op)
777+
return self._apply_binary_op(other, ops.eq_op)
766778

767-
def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
779+
def _apply_binary_op(
780+
self,
781+
other,
782+
op: ops.BinaryOp,
783+
alignment: typing.Literal["outer", "left"] = "outer",
784+
) -> Index:
785+
# Note: alignment arg is for compatibility with accessors, is ignored as irrelevant for implicit joins.
768786
# TODO: Handle local objects, or objects not implicitly alignable? Gets ambiguous with partial ordering though
769787
if isinstance(other, (bigframes.series.Series, Index)):
770788
other = Index(other)
@@ -785,12 +803,13 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
785803
for lid, rid in zip(lexpr.column_ids, rexpr.column_ids)
786804
]
787805
)
806+
labels = self.names if self.names == other.names else [None] * len(res_ids)
788807
return Index(
789808
blocks.Block(
790809
expr.select_columns(res_ids),
791810
index_columns=res_ids,
792811
column_labels=[],
793-
index_labels=[None] * len(res_ids),
812+
index_labels=labels,
794813
)
795814
)
796815
elif (
@@ -799,7 +818,7 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
799818
block, id = self._block.project_expr(
800819
op.as_expr(self._block.index_columns[0], ex.const(other))
801820
)
802-
return Index(block.select_column(id))
821+
return Index(block.set_index([id], index_labels=self.names))
803822
elif isinstance(other, tuple) and len(other) == self.nlevels:
804823
block = self._block.project_exprs(
805824
[
@@ -809,7 +828,7 @@ def _apply_binop(self, other, op: ops.BinaryOp) -> Index:
809828
labels=[None] * self.nlevels,
810829
drop=True,
811830
)
812-
return Index(block.set_index(block.value_columns))
831+
return Index(block.set_index(block.value_columns, index_labels=self.names))
813832
else:
814833
return NotImplemented
815834

bigframes/core/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __eq__(self, other) -> Index: # type: ignore
6060
import bigframes.operations as ops
6161
import bigframes.operations.aggregations as agg_ops
6262

63-
eq_result = self._apply_binop(other, ops.eq_op)._block.expr
63+
eq_result = self._apply_binary_op(other, ops.eq_op)._block.expr
6464

6565
as_array = ops.ToArrayOp().as_expr(
6666
*(

bigframes/operations/strings.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import re
18-
from typing import Literal, Optional, Union
18+
from typing import Literal, Optional, TYPE_CHECKING, Union
1919

2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.pandas.core.strings.accessor as vendorstr
@@ -25,7 +25,10 @@
2525
import bigframes.operations as ops
2626
from bigframes.operations._op_converters import convert_index, convert_slice
2727
import bigframes.operations.aggregations as agg_ops
28-
import bigframes.series as series
28+
29+
if TYPE_CHECKING:
30+
import bigframes.core.indexes.base as indices
31+
import bigframes.series as series
2932

3033
# Maps from python to re2
3134
REGEXP_FLAGS = {
@@ -39,7 +42,7 @@
3942
class StringMethods(vendorstr.StringMethods):
4043
__doc__ = vendorstr.StringMethods.__doc__
4144

42-
def __init__(self, data: series.Series):
45+
def __init__(self, data: Union[series.Series, indices.Index]):
4346
self._data = data
4447

4548
def __getitem__(self, key: Union[int, slice]) -> series.Series:

tests/system/small/test_index.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,3 +685,23 @@ def test_index_eq_aligned_index(scalars_df_index, scalars_pandas_df_index):
685685
scalars_pandas_df_index.int64_col.abs()
686686
)
687687
assert bf_result == pd.Index(pd_result)
688+
689+
690+
def test_index_str_accessor_unary(scalars_df_index, scalars_pandas_df_index):
691+
bf_index = scalars_df_index.set_index("string_col").index
692+
pd_index = scalars_pandas_df_index.set_index("string_col").index
693+
694+
bf_result = bf_index.str.pad(30, side="both", fillchar="~").to_pandas()
695+
pd_result = pd_index.str.pad(30, side="both", fillchar="~")
696+
697+
pd.testing.assert_index_equal(bf_result, pd_result)
698+
699+
700+
def test_index_str_accessor_binary(scalars_df_index, scalars_pandas_df_index):
701+
bf_index = scalars_df_index.set_index("string_col").index
702+
pd_index = scalars_pandas_df_index.set_index("string_col").index
703+
704+
bf_result = bf_index.str.cat(bf_index.str[:4]).to_pandas()
705+
pd_result = pd_index.str.cat(pd_index.str[:4])
706+
707+
pd.testing.assert_index_equal(bf_result, pd_result)

0 commit comments

Comments
 (0)