Skip to content

Commit 3667997

Browse files
more tests
1 parent 1431c24 commit 3667997

File tree

5 files changed

+111
-2
lines changed

5 files changed

+111
-2
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,26 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
328328
assert isinstance(op, string_ops.StrContainsRegexOp)
329329
return input.str.contains(pattern=op.pat, literal=False)
330330

331+
@compile_op.register(string_ops.UpperOp)
332+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
333+
assert isinstance(op, string_ops.UpperOp)
334+
return input.str.to_uppercase()
335+
336+
@compile_op.register(string_ops.LowerOp)
337+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
338+
assert isinstance(op, string_ops.LowerOp)
339+
return input.str.to_lowercase()
340+
341+
@compile_op.register(string_ops.ArrayLenOp)
342+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
343+
assert isinstance(op, string_ops.ArrayLenOp)
344+
return input.list.len()
345+
346+
@compile_op.register(string_ops.StrLenOp)
347+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
348+
assert isinstance(op, string_ops.StrLenOp)
349+
return input.str.len_chars()
350+
331351
@compile_op.register(string_ops.StartsWithOp)
332352
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
333353
assert isinstance(op, string_ops.StartsWithOp)

bigframes/core/compile/polars/lowering.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
generic_ops,
2828
json_ops,
2929
numeric_ops,
30+
string_ops,
3031
)
3132
import bigframes.operations as ops
3233

@@ -347,11 +348,28 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
347348
return ops.coalesce_op.as_expr(new_isin, expression.const(False))
348349

349350

351+
class LowerLenOp(op_lowering.OpLoweringRule):
352+
@property
353+
def op(self) -> type[ops.ScalarOp]:
354+
return string_ops.LenOp
355+
356+
def lower(self, expr: expression.OpExpression) -> expression.Expression:
357+
assert isinstance(expr.op, string_ops.LenOp)
358+
arg = expr.children[0]
359+
360+
if dtypes.is_string_like(arg.output_type):
361+
return string_ops.StrLenOp().as_expr(arg)
362+
elif dtypes.is_array_like(arg.output_type):
363+
return string_ops.ArrayLenOp().as_expr(arg)
364+
else:
365+
raise ValueError(f"Unexpected type: {arg.output_type}")
366+
367+
350368
def _coerce_comparables(
351369
expr1: expression.Expression,
352370
expr2: expression.Expression,
353371
*,
354-
bools_only: bool = False
372+
bools_only: bool = False,
355373
):
356374
if bools_only:
357375
if (
@@ -446,6 +464,7 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression):
446464
LowerAsTypeRule(),
447465
LowerInvertOp(),
448466
LowerIsinOp(),
467+
LowerLenOp(),
449468
)
450469

451470

bigframes/operations/python_op_maps.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@
3232
operator.sub: numeric_ops.sub_op,
3333
operator.mul: numeric_ops.mul_op,
3434
operator.truediv: numeric_ops.div_op,
35+
operator.floordiv: numeric_ops.floordiv_op,
3536
operator.mod: numeric_ops.mod_op,
3637
operator.pow: numeric_ops.pow_op,
3738
operator.pos: numeric_ops.pos_op,
3839
operator.neg: numeric_ops.neg_op,
3940
operator.abs: numeric_ops.abs_op,
40-
operator.eq: comparison_ops.eq_null_match_op,
41+
operator.eq: comparison_ops.eq_op,
4142
operator.ne: comparison_ops.ne_op,
4243
operator.gt: comparison_ops.gt_op,
4344
operator.lt: comparison_ops.lt_op,
@@ -49,6 +50,8 @@
4950
## math
5051
math.log: numeric_ops.ln_op,
5152
math.log10: numeric_ops.log10_op,
53+
math.log1p: numeric_ops.log1p_op,
54+
math.expm1: numeric_ops.expm1_op,
5255
math.sin: numeric_ops.sin_op,
5356
math.cos: numeric_ops.cos_op,
5457
math.tan: numeric_ops.tan_op,
@@ -58,6 +61,8 @@
5861
math.asin: numeric_ops.arcsin_op,
5962
math.acos: numeric_ops.arccos_op,
6063
math.atan: numeric_ops.arctan_op,
64+
math.floor: numeric_ops.floor_op,
65+
math.ceil: numeric_ops.ceil_op,
6166
## str
6267
str.upper: string_ops.upper_op,
6368
str.lower: string_ops.lower_op,

bigframes/operations/string_ops.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,23 @@
3030
)
3131
len_op = LenOp()
3232

33+
## Specialized len ops for compile-time lowering
34+
StrLenOp = base_ops.create_unary_op(
35+
name="strlen",
36+
type_signature=op_typing.FixedOutputType(
37+
dtypes.is_string_like, dtypes.INT_DTYPE, description="string-like"
38+
),
39+
)
40+
str_len_op = StrLenOp()
41+
42+
ArrayLenOp = base_ops.create_unary_op(
43+
name="arraylen",
44+
type_signature=op_typing.FixedOutputType(
45+
dtypes.is_array_like, dtypes.INT_DTYPE, description="array-like"
46+
),
47+
)
48+
array_len_op = ArrayLenOp()
49+
3350
ReverseOp = base_ops.create_unary_op(
3451
name="reverse", type_signature=op_typing.STRING_TRANSFORM
3552
)

tests/unit/test_series_polars.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import datetime as dt
1616
import json
1717
import math
18+
import operator
1819
import pathlib
1920
import re
2021
import tempfile
@@ -4697,6 +4698,30 @@ def wrapped(x):
46974698
assert_series_equal(bf_result, pd_result, check_dtype=False)
46984699

46994700

4701+
@pytest.mark.parametrize(
4702+
("ufunc",),
4703+
[
4704+
pytest.param(str.upper),
4705+
pytest.param(str.lower),
4706+
pytest.param(len),
4707+
],
4708+
)
4709+
def test_series_apply_python_string_fns(scalars_dfs, ufunc):
4710+
scalars_df, scalars_pandas_df = scalars_dfs
4711+
4712+
bf_col = scalars_df["string_col"]
4713+
bf_result = bf_col.apply(ufunc).to_pandas()
4714+
4715+
pd_col = scalars_pandas_df["string_col"]
4716+
4717+
def wrapped(x):
4718+
return ufunc(x) if isinstance(x, str) else None
4719+
4720+
pd_result = pd_col.apply(wrapped)
4721+
4722+
assert_series_equal(bf_result, pd_result, check_dtype=False)
4723+
4724+
47004725
@pytest.mark.parametrize(
47014726
("ufunc",),
47024727
[
@@ -4720,6 +4745,29 @@ def test_combine_series_ufunc(scalars_dfs, ufunc):
47204745
assert_series_equal(bf_result, pd_result, check_dtype=False)
47214746

47224747

4748+
@pytest.mark.parametrize(
4749+
("func",),
4750+
[
4751+
pytest.param(operator.add),
4752+
pytest.param(operator.truediv),
4753+
],
4754+
ids=[
4755+
"add",
4756+
"divide",
4757+
],
4758+
)
4759+
def test_combine_series_pyfunc(scalars_dfs, func):
4760+
scalars_df, scalars_pandas_df = scalars_dfs
4761+
4762+
bf_col = scalars_df["int64_col"].dropna()
4763+
bf_result = bf_col.combine(bf_col, func).to_pandas()
4764+
4765+
pd_col = scalars_pandas_df["int64_col"].dropna()
4766+
pd_result = pd_col.combine(pd_col, func)
4767+
4768+
assert_series_equal(bf_result, pd_result, check_dtype=False)
4769+
4770+
47234771
def test_combine_scalar_ufunc(scalars_dfs):
47244772
scalars_df, scalars_pandas_df = scalars_dfs
47254773

0 commit comments

Comments
 (0)