Skip to content

Commit 4f5166c

Browse files
committed
optimize sqls for the null-last statement
1 parent 7018a4e commit 4f5166c

File tree

14 files changed

+26
-45
lines changed

14 files changed

+26
-45
lines changed

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,9 @@ def apply_window_if_present(
4040
# Unbound grouping window.
4141
order_by = None
4242
elif window.is_range_bounded:
43-
# Note that, when the window is range-bounded, we only need one ordering key.
44-
# There are two reasons:
45-
# 1. Manipulating null positions requires more than one ordering key, which
46-
# is forbidden by SQL window syntax for range rolling.
47-
# 2. Pandas does not allow range rolling on timeseries with nulls.
48-
order_by = get_window_order_by((window.ordering[0],), override_null_order=False)
43+
order_by = get_window_order_by((window.ordering[0],))
4944
else:
50-
order_by = get_window_order_by(window.ordering, override_null_order=True)
45+
order_by = get_window_order_by(window.ordering)
5146

5247
order = sge.Order(expressions=order_by) if order_by else None
5348

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,12 @@ def compile_random_sample(
299299
def compile_aggregate(
300300
self, node: nodes.AggregateNode, child: ir.SQLGlotIR
301301
) -> ir.SQLGlotIR:
302+
# TODO: Update the notes
303+
# Note that, when the window is range-bounded, we only need one ordering key.
304+
# There are two reasons:
305+
# 1. Manipulating null positions requires more than one ordering key, which
306+
# is forbidden by SQL window syntax for range rolling.
307+
# 2. Pandas does not allow range rolling on timeseries with nulls.
302308
ordering_cols = windows.get_window_order_by(
303309
node.order_by, override_null_order=True
304310
)

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
8+
ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_date_series_diff/out.sql

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
CAST(DATE_DIFF(
9-
`bfcol_0`,
10-
LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST),
11-
DAY
12-
) * 86400000000 AS INT64) AS `bfcol_1`
8+
CAST(DATE_DIFF(`bfcol_0`, LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST), DAY) * 86400000000 AS INT64) AS `bfcol_1`
139
FROM `bfcte_0`
1410
)
1511
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_int.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
`bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
8+
`bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first_non_null/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WITH `bfcte_0` AS (
66
SELECT
77
*,
88
FIRST_VALUE(`bfcol_0` IGNORE NULLS) OVER (
9-
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
9+
ORDER BY `bfcol_0` ASC NULLS LAST
1010
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
1111
) AS `bfcol_1`
1212
FROM `bfcte_0`

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last_non_null/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WITH `bfcte_0` AS (
66
SELECT
77
*,
88
LAST_VALUE(`bfcol_0` IGNORE NULLS) OVER (
9-
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
9+
ORDER BY `bfcol_0` ASC NULLS LAST
1010
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
1111
) AS `bfcol_1`
1212
FROM `bfcte_0`

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
RANK() OVER (ORDER BY `bfcol_0` IS NULL DESC NULLS FIRST, `bfcol_0` DESC NULLS FIRST) AS `bfcol_1`
8+
RANK() OVER (ORDER BY `bfcol_0` DESC NULLS FIRST) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_time_series_diff/out.sql

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
TIMESTAMP_DIFF(
9-
`bfcol_0`,
10-
LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST),
11-
MICROSECOND
12-
) AS `bfcol_1`
8+
TIMESTAMP_DIFF(`bfcol_0`, LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST), MICROSECOND) AS `bfcol_1`
139
FROM `bfcte_0`
1410
)
1511
SELECT

tests/unit/core/compile/sqlglot/aggregations/test_windows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def test_apply_window_if_present_all_params(self):
133133
)
134134
self.assertEqual(
135135
result.sql(dialect="bigquery"),
136-
"value OVER (PARTITION BY `col1` ORDER BY `col2` IS NULL ASC NULLS LAST, `col2` ASC NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)",
136+
"value OVER (PARTITION BY `col1` ORDER BY `col2` ASC NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)",
137137
)
138138

139139

0 commit comments

Comments
 (0)