Skip to content

Commit 1edee54

Browse files
committed
enable non-numeric for ibis compiler too
1 parent f3f733f commit 1edee54

File tree

6 files changed

+25
-46
lines changed

6 files changed

+25
-46
lines changed

bigframes/core/compile/ibis_compiler/aggregate_compiler.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,15 +175,11 @@ def _(
175175

176176

177177
@compile_unary_agg.register
178-
@numeric_op
179178
def _(
180179
op: agg_ops.MedianOp,
181180
column: ibis_types.NumericColumn,
182181
window=None,
183182
) -> ibis_types.NumericValue:
184-
# TODO(swast): Allow switching between exact and approximate median.
185-
# For now, the best we can do is an approximate median when we're doing
186-
# an aggregation, as PERCENTILE_CONT is only an analytic function.
187183
return cast(ibis_types.NumericValue, column.approx_median())
188184

189185

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,6 @@ def _(
6262
column: typed_expr.TypedExpr,
6363
window: typing.Optional[window_spec.WindowSpec] = None,
6464
) -> sge.Expression:
65-
# TODO(swast): Allow switching between exact and approximate median.
66-
# For now, the best we can do is an approximate median when we're doing
67-
# an aggregation, as PERCENTILE_CONT is only an analytic function.
6865
approx_quantiles = sge.func("APPROX_QUANTILES", column.expr, sge.convert(2))
6966
return sge.Bracket(
7067
this=approx_quantiles, expressions=[sge.func("OFFSET", sge.convert(1))]

tests/system/small/engines/test_aggregation.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,6 @@ def test_sql_engines_median_op_aggregates(
9292
node = apply_agg_to_all_valid(
9393
scalars_array_value,
9494
agg_ops.MedianOp(),
95-
# Exclude columns are not supported by Ibis.
96-
excluded_cols=[
97-
"bytes_col",
98-
"date_col",
99-
"datetime_col",
100-
"time_col",
101-
"timestamp_col",
102-
"string_col",
103-
],
10495
).node
10596
left_engine = direct_gbq_execution.DirectGbqExecutor(bigquery_client)
10697
right_engine = direct_gbq_execution.DirectGbqExecutor(

tests/system/small/test_series.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1919,10 +1919,22 @@ def test_mean(scalars_dfs):
19191919
assert math.isclose(pd_result, bf_result)
19201920

19211921

1922-
def test_median(scalars_dfs):
1922+
@pytest.mark.parametrize(
1923+
("col_name"),
1924+
[
1925+
"int64_col",
1926+
# Non-numeric column
1927+
"bytes_col",
1928+
"date_col",
1929+
"datetime_col",
1930+
"time_col",
1931+
"timestamp_col",
1932+
"string_col",
1933+
],
1934+
)
1935+
def test_median(scalars_dfs, col_name):
19231936
scalars_df, scalars_pandas_df = scalars_dfs
1924-
col_name = "int64_col"
1925-
bf_result = scalars_df[col_name].median()
1937+
bf_result = scalars_df[col_name].median(exact=False)
19261938
pd_max = scalars_pandas_df[col_name].max()
19271939
pd_min = scalars_pandas_df[col_name].min()
19281940
# Median is approximate, so just check for plausibility.
@@ -1932,7 +1944,7 @@ def test_median(scalars_dfs):
19321944
def test_median_exact(scalars_dfs):
19331945
scalars_df, scalars_pandas_df = scalars_dfs
19341946
col_name = "int64_col"
1935-
bf_result = scalars_df[col_name].median(exact=True)
1947+
bf_result = scalars_df[col_name].median()
19361948
pd_result = scalars_pandas_df[col_name].median()
19371949
assert math.isclose(pd_result, bf_result)
19381950

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,18 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`bytes_col` AS `bfcol_0`,
4-
`date_col` AS `bfcol_1`,
5-
`datetime_col` AS `bfcol_2`,
6-
`int64_col` AS `bfcol_3`,
7-
`string_col` AS `bfcol_4`,
8-
`time_col` AS `bfcol_5`,
9-
`timestamp_col` AS `bfcol_6`
3+
`date_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`string_col` AS `bfcol_2`
106
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
117
), `bfcte_1` AS (
128
SELECT
13-
APPROX_QUANTILES(`bfcol_3`, 2)[OFFSET(1)] AS `bfcol_7`,
14-
APPROX_QUANTILES(`bfcol_0`, 2)[OFFSET(1)] AS `bfcol_8`,
15-
APPROX_QUANTILES(`bfcol_1`, 2)[OFFSET(1)] AS `bfcol_9`,
16-
APPROX_QUANTILES(`bfcol_2`, 2)[OFFSET(1)] AS `bfcol_10`,
17-
APPROX_QUANTILES(`bfcol_5`, 2)[OFFSET(1)] AS `bfcol_11`,
18-
APPROX_QUANTILES(`bfcol_6`, 2)[OFFSET(1)] AS `bfcol_12`,
19-
APPROX_QUANTILES(`bfcol_4`, 2)[OFFSET(1)] AS `bfcol_13`
9+
APPROX_QUANTILES(`bfcol_1`, 2)[OFFSET(1)] AS `bfcol_3`,
10+
APPROX_QUANTILES(`bfcol_0`, 2)[OFFSET(1)] AS `bfcol_4`,
11+
APPROX_QUANTILES(`bfcol_2`, 2)[OFFSET(1)] AS `bfcol_5`
2012
FROM `bfcte_0`
2113
)
2214
SELECT
23-
`bfcol_7` AS `int64_col`,
24-
`bfcol_8` AS `bytes_col`,
25-
`bfcol_9` AS `date_col`,
26-
`bfcol_10` AS `datetime_col`,
27-
`bfcol_11` AS `time_col`,
28-
`bfcol_12` AS `timestamp_col`,
29-
`bfcol_13` AS `string_col`
15+
`bfcol_3` AS `int64_col`,
16+
`bfcol_4` AS `date_col`,
17+
`bfcol_5` AS `string_col`
3018
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,7 @@ def test_median(scalar_types_df: bpd.DataFrame, snapshot):
6060
bf_df = scalar_types_df
6161
ops_map = {
6262
"int64_col": agg_ops.MedianOp().as_expr("int64_col"),
63-
# Includes columns are not supported by Ibis but supported by BigQuery.
64-
"bytes_col": agg_ops.MedianOp().as_expr("bytes_col"),
6563
"date_col": agg_ops.MedianOp().as_expr("date_col"),
66-
"datetime_col": agg_ops.MedianOp().as_expr("datetime_col"),
67-
"time_col": agg_ops.MedianOp().as_expr("time_col"),
68-
"timestamp_col": agg_ops.MedianOp().as_expr("timestamp_col"),
6964
"string_col": agg_ops.MedianOp().as_expr("string_col"),
7065
}
7166
sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))

0 commit comments

Comments
 (0)