Skip to content

Commit 4ded421

Browse files
authored
Merge branch 'main' into sycai_ai_gen_bool_sqlglot
2 parents 5814727 + 801be1b commit 4ded421

File tree

12 files changed

+170
-40
lines changed

12 files changed

+170
-40
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ repos:
2020
hooks:
2121
- id: trailing-whitespace
2222
- id: end-of-file-fixer
23-
exclude: "^tests/unit/core/compile/sqlglot/snapshots"
23+
exclude: "^tests/unit/core/compile/sqlglot/.*snapshots"
2424
- id: check-yaml
2525
- repo: https://github.com/pycqa/isort
2626
rev: 5.12.0

bigframes/core/blocks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ def from_local(
252252
pass
253253
return block
254254

255+
@property
256+
def has_index(self) -> bool:
257+
return len(self._index_columns) > 0
258+
255259
@property
256260
def index(self) -> BlockIndexProperties:
257261
"""Row identities for values in the Block."""

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import typing
1818

19+
import pandas as pd
1920
import sqlglot.expressions as sge
2021

2122
from bigframes import dtypes
@@ -46,18 +47,22 @@ def _(
4647
return apply_window_if_present(sge.func("COUNT", column.expr), window)
4748

4849

49-
@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
50+
@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
5051
def _(
51-
op: agg_ops.SumOp,
52+
op: agg_ops.MaxOp,
5253
column: typed_expr.TypedExpr,
5354
window: typing.Optional[window_spec.WindowSpec] = None,
5455
) -> sge.Expression:
55-
expr = column.expr
56-
if column.dtype == dtypes.BOOL_DTYPE:
57-
expr = sge.Cast(this=column.expr, to="INT64")
58-
# Will be null if all inputs are null. Pandas defaults to zero sum though.
59-
expr = apply_window_if_present(sge.func("SUM", expr), window)
60-
return sge.func("IFNULL", expr, ir._literal(0, column.dtype))
56+
return apply_window_if_present(sge.func("MAX", column.expr), window)
57+
58+
59+
@UNARY_OP_REGISTRATION.register(agg_ops.MinOp)
60+
def _(
61+
op: agg_ops.MinOp,
62+
column: typed_expr.TypedExpr,
63+
window: typing.Optional[window_spec.WindowSpec] = None,
64+
) -> sge.Expression:
65+
return apply_window_if_present(sge.func("MIN", column.expr), window)
6166

6267

6368
@UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
@@ -67,3 +72,20 @@ def _(
6772
window: typing.Optional[window_spec.WindowSpec] = None,
6873
) -> sge.Expression:
6974
return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
75+
76+
77+
@UNARY_OP_REGISTRATION.register(agg_ops.SumOp)
78+
def _(
79+
op: agg_ops.SumOp,
80+
column: typed_expr.TypedExpr,
81+
window: typing.Optional[window_spec.WindowSpec] = None,
82+
) -> sge.Expression:
83+
expr = column.expr
84+
if column.dtype == dtypes.BOOL_DTYPE:
85+
expr = sge.Cast(this=column.expr, to="INT64")
86+
87+
expr = apply_window_if_present(sge.func("SUM", expr), window)
88+
89+
# Will be null if all inputs are null. Pandas defaults to zero sum though.
90+
zero = pd.to_timedelta(0) if column.dtype == dtypes.TIMEDELTA_DTYPE else 0
91+
return sge.func("IFNULL", expr, ir._literal(zero, column.dtype))

bigframes/core/compile/sqlglot/scalar_compiler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def register_unary_op(
7979
"""
8080
key = typing.cast(str, op_ref.name)
8181

82-
def decorator(impl: typing.Callable[..., TypedExpr]):
82+
def decorator(impl: typing.Callable[..., sge.Expression]):
8383
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
8484
if pass_op:
8585
return impl(args[0], op)
@@ -108,7 +108,7 @@ def register_binary_op(
108108
"""
109109
key = typing.cast(str, op_ref.name)
110110

111-
def decorator(impl: typing.Callable[..., TypedExpr]):
111+
def decorator(impl: typing.Callable[..., sge.Expression]):
112112
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
113113
if pass_op:
114114
return impl(args[0], args[1], op)
@@ -132,7 +132,7 @@ def register_ternary_op(
132132
"""
133133
key = typing.cast(str, op_ref.name)
134134

135-
def decorator(impl: typing.Callable[..., TypedExpr]):
135+
def decorator(impl: typing.Callable[..., sge.Expression]):
136136
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
137137
return impl(args[0], args[1], args[2])
138138

@@ -156,7 +156,7 @@ def register_nary_op(
156156
"""
157157
key = typing.cast(str, op_ref.name)
158158

159-
def decorator(impl: typing.Callable[..., TypedExpr]):
159+
def decorator(impl: typing.Callable[..., sge.Expression]):
160160
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
161161
if pass_op:
162162
return impl(*args, op=op)

bigframes/dataframe.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,6 @@ def memory_usage(self, index: bool = True):
489489
column_sizes = pandas.concat([index_size, column_sizes])
490490
return column_sizes
491491

492-
@validations.requires_index
493492
def info(
494493
self,
495494
verbose: Optional[bool] = None,
@@ -512,12 +511,17 @@ def info(
512511

513512
obuf.write(f"{type(self)}\n")
514513

515-
index_type = "MultiIndex" if self.index.nlevels > 1 else "Index"
514+
if self._block.has_index:
515+
index_type = "MultiIndex" if self.index.nlevels > 1 else "Index"
516516

517-
# These accessses are kind of expensive, maybe should try to skip?
518-
first_indice = self.index[0]
519-
last_indice = self.index[-1]
520-
obuf.write(f"{index_type}: {n_rows} entries, {first_indice} to {last_indice}\n")
517+
# These accessses are kind of expensive, maybe should try to skip?
518+
first_indice = self.index[0]
519+
last_indice = self.index[-1]
520+
obuf.write(
521+
f"{index_type}: {n_rows} entries, {first_indice} to {last_indice}\n"
522+
)
523+
else:
524+
obuf.write("NullIndex\n")
521525

522526
dtype_strings = self.dtypes.astype("string")
523527
if show_all_columns:

tests/system/small/test_null_index.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515

16+
import io
17+
1618
import pandas as pd
1719
import pytest
1820

@@ -44,6 +46,38 @@ def test_null_index_materialize(scalars_df_null_index, scalars_pandas_df_default
4446
)
4547

4648

49+
def test_null_index_info(scalars_df_null_index):
50+
expected = (
51+
"<class 'bigframes.dataframe.DataFrame'>\n"
52+
"NullIndex\n"
53+
"Data columns (total 14 columns):\n"
54+
" # Column Non-Null Count Dtype\n"
55+
"--- ------------- ---------------- ------------------------------\n"
56+
" 0 bool_col 8 non-null boolean\n"
57+
" 1 bytes_col 6 non-null binary[pyarrow]\n"
58+
" 2 date_col 7 non-null date32[day][pyarrow]\n"
59+
" 3 datetime_col 6 non-null timestamp[us][pyarrow]\n"
60+
" 4 geography_col 4 non-null geometry\n"
61+
" 5 int64_col 8 non-null Int64\n"
62+
" 6 int64_too 9 non-null Int64\n"
63+
" 7 numeric_col 6 non-null decimal128(38, 9)[pyarrow]\n"
64+
" 8 float64_col 7 non-null Float64\n"
65+
" 9 rowindex_2 9 non-null Int64\n"
66+
" 10 string_col 8 non-null string\n"
67+
" 11 time_col 6 non-null time64[us][pyarrow]\n"
68+
" 12 timestamp_col 6 non-null timestamp[us, tz=UTC][pyarrow]\n"
69+
" 13 duration_col 7 non-null duration[us][pyarrow]\n"
70+
"dtypes: Float64(1), Int64(3), binary[pyarrow](1), boolean(1), date32[day][pyarrow](1), decimal128(38, 9)[pyarrow](1), duration[us][pyarrow](1), geometry(1), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
71+
"memory usage: 1269 bytes\n"
72+
)
73+
74+
bf_result = io.StringIO()
75+
76+
scalars_df_null_index.drop(columns="rowindex").info(buf=bf_result)
77+
78+
assert expected == bf_result.getvalue()
79+
80+
4781
def test_null_index_series_repr(scalars_df_null_index, scalars_pandas_df_default_index):
4882
bf_result = scalars_df_null_index["int64_too"].head(5).__repr__()
4983
pd_result = (
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
COUNT(`bfcol_0`) AS `bfcol_1`
8+
FROM `bfcte_0`
9+
)
10+
SELECT
11+
`bfcol_1` AS `int64_col`
12+
FROM `bfcte_1`
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
MAX(`bfcol_0`) AS `bfcol_1`
8+
FROM `bfcte_0`
9+
)
10+
SELECT
11+
`bfcol_1` AS `int64_col`
12+
FROM `bfcte_1`
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
MIN(`bfcol_0`) AS `bfcol_1`
8+
FROM `bfcte_0`
9+
)
10+
SELECT
11+
`bfcol_1` AS `int64_col`
12+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_size/out.sql renamed to tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_size_unary/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`string_col` AS `bfcol_0`
3+
`float64_col` AS `bfcol_0`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
77
COUNT(1) AS `bfcol_1`
88
FROM `bfcte_0`
99
)
1010
SELECT
11-
`bfcol_1` AS `string_col_agg`
11+
`bfcol_1` AS `float64_col`
1212
FROM `bfcte_1`

0 commit comments

Comments
 (0)