From 22e31d67ac15fc3dadf1910bd0572e6e45f87719 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 13 Oct 2025 22:31:53 +0000 Subject: [PATCH 1/4] test: add more window specs to the aggregation tests --- .../test_all/window_out.sql | 17 +++ .../test_all/window_partition_out.sql | 18 +++ .../test_any_value/window_out.sql | 13 ++ .../test_any_value/window_partition_out.sql | 18 +++ .../test_count/window_out.sql | 13 ++ .../test_count/window_partition_out.sql | 14 +++ .../test_max/window_out.sql | 13 ++ .../test_max/window_partition_out.sql | 18 +++ .../test_mean/window_out.sql | 13 ++ .../test_mean/window_partition_out.sql | 18 +++ .../test_min/window_out.sql | 13 ++ .../test_min/window_partition_out.sql | 18 +++ .../test_sum/window_out.sql | 13 ++ .../test_sum/window_partition_out.sql | 18 +++ .../aggregations/test_unary_compiler.py | 111 ++++++++++++++++++ 15 files changed, 328 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql new file mode 100644 index 0000000000..a91381d3be --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql @@ -0,0 +1,17 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (), TRUE) + END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_bool` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql new file mode 100644 index 0000000000..1a9a020b3e --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (PARTITION BY `bfcol_1`), TRUE) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_bool` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql new file mode 100644 index 0000000000..b262284b88 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE ANY_VALUE(`bfcol_0`) OVER () END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql new file mode 100644 index 0000000000..66933626b5 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_value/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE ANY_VALUE(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_out.sql new file mode 100644 index 0000000000..beda182992 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + COUNT(`bfcol_0`) OVER () AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_partition_out.sql new file mode 100644 index 0000000000..d4a12d73f8 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_count/window_partition_out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + COUNT(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql new file mode 100644 index 0000000000..4c86cb38e0 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE MAX(`bfcol_0`) OVER () END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql new file mode 100644 index 0000000000..64dc97642b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_max/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE MAX(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql new file mode 100644 index 0000000000..bc89091ca9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE AVG(`bfcol_0`) OVER () END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql new file mode 100644 index 0000000000..e6c1cf3bb4 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE AVG(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql new file mode 100644 index 0000000000..0e96b56c50 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE MIN(`bfcol_0`) OVER () END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql new file mode 100644 index 0000000000..4121e80f43 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_min/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE MIN(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql new file mode 100644 index 0000000000..939b491dd0 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_out.sql @@ -0,0 +1,13 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE COALESCE(SUM(`bfcol_0`) OVER (), 0) END AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql new file mode 100644 index 0000000000..a23842867e --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_sum/window_partition_out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_0`, + `string_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CASE + WHEN `bfcol_0` IS NULL + THEN NULL + ELSE COALESCE(SUM(`bfcol_0`) OVER (PARTITION BY `bfcol_1`), 0) + END AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `agg_int64` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index da388ccad1..965e5d2978 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -72,6 +72,21 @@ def test_all(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_bool") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_bool" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_approx_quartiles(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" @@ -105,6 +120,21 @@ def test_any_value(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_count(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" @@ -114,6 +144,21 @@ def test_count(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_dense_rank(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" @@ -228,6 +273,21 @@ def test_max(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_mean(scalar_types_df: bpd.DataFrame, snapshot): col_names = ["int64_col", "bool_col", "duration_col"] @@ -255,6 +315,24 @@ def test_mean(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + col_name = "int64_col" + bf_df_int = scalar_types_df[[col_name]] + agg_expr = agg_ops.MeanOp().as_expr(col_name) + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df_int, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_median(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df @@ -276,6 +354,21 @@ def test_min(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_quantile(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" @@ -343,6 +436,24 @@ def test_sum(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") + # Window tests + col_name = "int64_col" + bf_df_int = scalar_types_df[[col_name]] + agg_expr = agg_ops.SumOp().as_expr(col_name) + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + sql_window = _apply_unary_window_op(bf_df_int, agg_expr, window, "agg_int64") + snapshot.assert_match(sql_window, "window_out.sql") + + bf_df_str = scalar_types_df[[col_name, "string_col"]] + window_partition = window_spec.WindowSpec( + grouping_keys=(expression.deref("string_col"),), + ordering=(ordering.ascending_over(col_name),), + ) + sql_window_partition = _apply_unary_window_op( + bf_df_str, agg_expr, window_partition, "agg_int64" + ) + snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + def test_time_series_diff(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" From 60c58007381af272485986ddb56d018c62a96bfb Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 13 Oct 2025 22:33:55 +0000 Subject: [PATCH 2/4] test: update ordering spec on asc/desc and nulls_last --- .../test_dense_rank/out.sql | 2 +- .../test_diff/diff_bool.sql | 2 +- .../test_unary_compiler/test_first/out.sql | 5 +--- .../test_unary_compiler/test_last/out.sql | 5 +--- .../test_unary_compiler/test_rank/out.sql | 2 +- .../test_unary_compiler/test_shift/lag.sql | 2 +- .../test_unary_compiler/test_shift/lead.sql | 2 +- .../aggregations/test_unary_compiler.py | 30 +++++++++++-------- 8 files changed, 24 insertions(+), 26 deletions(-) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_dense_rank/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_dense_rank/out.sql index 38b6ed9f5c..0f704dd0cc 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_dense_rank/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_dense_rank/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - DENSE_RANK() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + DENSE_RANK() OVER (ORDER BY `bfcol_0` DESC) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_bool.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_bool.sql index 6c7d37c037..500056928d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_bool.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_bool.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - `bfcol_0` <> LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + `bfcol_0` <> LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` DESC) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql index 6c7d39c24a..df76aa1d33 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first/out.sql @@ -8,10 +8,7 @@ WITH `bfcte_0` AS ( CASE WHEN `bfcol_0` IS NULL THEN NULL - ELSE FIRST_VALUE(`bfcol_0`) OVER ( - ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) + ELSE FIRST_VALUE(`bfcol_0`) OVER (ORDER BY `bfcol_0` DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql index 788c5ba466..4a5af9af32 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last/out.sql @@ -8,10 +8,7 @@ WITH `bfcte_0` AS ( CASE WHEN `bfcol_0` IS NULL THEN NULL - ELSE LAST_VALUE(`bfcol_0`) OVER ( - ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) + ELSE LAST_VALUE(`bfcol_0`) OVER (ORDER BY `bfcol_0` DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql index 5de2330ef6..b5983c1517 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - RANK() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + RANK() OVER (ORDER BY `bfcol_0` IS NULL DESC NULLS FIRST, `bfcol_0` DESC NULLS FIRST) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lag.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lag.sql index 59e2c47edf..32e6eabb9b 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lag.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lag.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lead.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lead.sql index 5c82b5db39..f0797f1e17 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lead.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_shift/lead.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - LEAD(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + LEAD(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index 965e5d2978..3d7e4287ac 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -80,7 +80,7 @@ def test_all(scalar_types_df: bpd.DataFrame, snapshot): bf_df_str = scalar_types_df[[col_name, "string_col"]] window_partition = window_spec.WindowSpec( grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.ascending_over(col_name),), + ordering=(ordering.descending_over(col_name),), ) sql_window_partition = _apply_unary_window_op( bf_df_str, agg_expr, window_partition, "agg_bool" @@ -121,7 +121,7 @@ def test_any_value(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") # Window tests - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") snapshot.assert_match(sql_window, "window_out.sql") @@ -152,7 +152,7 @@ def test_count(scalar_types_df: bpd.DataFrame, snapshot): bf_df_str = scalar_types_df[[col_name, "string_col"]] window_partition = window_spec.WindowSpec( grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.ascending_over(col_name),), + ordering=(ordering.descending_over(col_name),), ) sql_window_partition = _apply_unary_window_op( bf_df_str, agg_expr, window_partition, "agg_int64" @@ -166,7 +166,7 @@ def test_dense_rank(scalar_types_df: bpd.DataFrame, snapshot): agg_expr = agg_exprs.UnaryAggregation( agg_ops.DenseRankOp(), expression.deref(col_name) ) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") snapshot.assert_match(sql, "out.sql") @@ -197,7 +197,7 @@ def test_diff(scalar_types_df: bpd.DataFrame, snapshot): # Test boolean bool_col = "bool_col" bf_df_bool = scalar_types_df[[bool_col]] - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(bool_col),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(bool_col),)) bool_op = agg_exprs.UnaryAggregation( agg_ops.DiffOp(periods=1), expression.deref(bool_col) ) @@ -213,7 +213,7 @@ def test_first(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_exprs.UnaryAggregation(agg_ops.FirstOp(), expression.deref(col_name)) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") snapshot.assert_match(sql, "out.sql") @@ -243,7 +243,7 @@ def test_last(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_exprs.UnaryAggregation(agg_ops.LastOp(), expression.deref(col_name)) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") snapshot.assert_match(sql, "out.sql") @@ -281,7 +281,7 @@ def test_max(scalar_types_df: bpd.DataFrame, snapshot): bf_df_str = scalar_types_df[[col_name, "string_col"]] window_partition = window_spec.WindowSpec( grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.ascending_over(col_name),), + ordering=(ordering.descending_over(col_name),), ) sql_window_partition = _apply_unary_window_op( bf_df_str, agg_expr, window_partition, "agg_int64" @@ -319,7 +319,7 @@ def test_mean(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df_int = scalar_types_df[[col_name]] agg_expr = agg_ops.MeanOp().as_expr(col_name) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df_int, agg_expr, window, "agg_int64") snapshot.assert_match(sql_window, "window_out.sql") @@ -362,7 +362,7 @@ def test_min(scalar_types_df: bpd.DataFrame, snapshot): bf_df_str = scalar_types_df[[col_name, "string_col"]] window_partition = window_spec.WindowSpec( grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.ascending_over(col_name),), + ordering=(ordering.descending_over(col_name),), ) sql_window_partition = _apply_unary_window_op( bf_df_str, agg_expr, window_partition, "agg_int64" @@ -391,7 +391,9 @@ def test_rank(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[[col_name]] agg_expr = agg_exprs.UnaryAggregation(agg_ops.RankOp(), expression.deref(col_name)) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec( + ordering=(ordering.descending_over(col_name, nulls_last=False),) + ) sql = _apply_unary_window_op(bf_df, agg_expr, window, "agg_int64") snapshot.assert_match(sql, "out.sql") @@ -400,7 +402,9 @@ def test_rank(scalar_types_df: bpd.DataFrame, snapshot): def test_shift(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec( + ordering=(ordering.ascending_over(col_name, nulls_last=False),) + ) # Test lag lag_op = agg_exprs.UnaryAggregation( @@ -440,7 +444,7 @@ def test_sum(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df_int = scalar_types_df[[col_name]] agg_expr = agg_ops.SumOp().as_expr(col_name) - window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + window = window_spec.WindowSpec(ordering=(ordering.descending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df_int, agg_expr, window, "agg_int64") snapshot.assert_match(sql_window, "window_out.sql") From e191cc6726cae31798580fac8787145f50c4986a Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 13 Oct 2025 23:22:08 +0000 Subject: [PATCH 3/4] optimize sqls for the null-last statement --- bigframes/core/compile/sqlglot/aggregations/windows.py | 9 ++------- bigframes/core/compile/sqlglot/compiler.py | 6 ++++++ .../test_row_number_with_window/out.sql | 2 +- .../test_unary_compiler/test_date_series_diff/out.sql | 6 +----- .../test_unary_compiler/test_diff/diff_int.sql | 2 +- .../test_unary_compiler/test_first_non_null/out.sql | 2 +- .../test_unary_compiler/test_last_non_null/out.sql | 2 +- .../snapshots/test_unary_compiler/test_rank/out.sql | 2 +- .../test_unary_compiler/test_time_series_diff/out.sql | 6 +----- .../core/compile/sqlglot/aggregations/test_windows.py | 2 +- .../test_compile_concat_filter_sorted/out.sql | 4 ++-- .../test_compile_window_w_groupby_rolling/out.sql | 8 ++++---- .../test_compile_window_w_skips_nulls_op/out.sql | 10 ++-------- .../test_compile_window_wo_skips_nulls_op/out.sql | 10 ++-------- 14 files changed, 26 insertions(+), 45 deletions(-) diff --git a/bigframes/core/compile/sqlglot/aggregations/windows.py b/bigframes/core/compile/sqlglot/aggregations/windows.py index 41b4c674f9..5499fd86e8 100644 --- a/bigframes/core/compile/sqlglot/aggregations/windows.py +++ b/bigframes/core/compile/sqlglot/aggregations/windows.py @@ -40,14 +40,9 @@ def apply_window_if_present( # Unbound grouping window. order_by = None elif window.is_range_bounded: - # Note that, when the window is range-bounded, we only need one ordering key. - # There are two reasons: - # 1. Manipulating null positions requires more than one ordering key, which - # is forbidden by SQL window syntax for range rolling. - # 2. Pandas does not allow range rolling on timeseries with nulls. - order_by = get_window_order_by((window.ordering[0],), override_null_order=False) + order_by = get_window_order_by((window.ordering[0],)) else: - order_by = get_window_order_by(window.ordering, override_null_order=True) + order_by = get_window_order_by(window.ordering) order = sge.Order(expressions=order_by) if order_by else None diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index 40795bbb48..9248463664 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -299,6 +299,12 @@ def compile_random_sample( def compile_aggregate( self, node: nodes.AggregateNode, child: ir.SQLGlotIR ) -> ir.SQLGlotIR: + # TODO: Update the notes + # Note that, when the window is range-bounded, we only need one ordering key. + # There are two reasons: + # 1. Manipulating null positions requires more than one ordering key, which + # is forbidden by SQL window syntax for range rolling. + # 2. Pandas does not allow range rolling on timeseries with nulls. ordering_cols = windows.get_window_order_by( node.order_by, override_null_order=True ) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql index 2cee8a228f..8dc701e1f9 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_date_series_diff/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_date_series_diff/out.sql index 599d8333c9..cd1cf3ff3b 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_date_series_diff/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_date_series_diff/out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CAST(DATE_DIFF( - `bfcol_0`, - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST), - DAY - ) * 86400000000 AS INT64) AS `bfcol_1` + CAST(DATE_DIFF(`bfcol_0`, LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST), DAY) * 86400000000 AS INT64) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_int.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_int.sql index 1ce4953d87..f4fd46ee2d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_int.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff/diff_int.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - `bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1` + `bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first_non_null/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first_non_null/out.sql index ff90c6fcd9..70eeefda7b 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first_non_null/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_first_non_null/out.sql @@ -6,7 +6,7 @@ WITH `bfcte_0` AS ( SELECT *, FIRST_VALUE(`bfcol_0` IGNORE NULLS) OVER ( - ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST + ORDER BY `bfcol_0` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS `bfcol_1` FROM `bfcte_0` diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last_non_null/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last_non_null/out.sql index 17e7dbd446..a246618ff0 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last_non_null/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_last_non_null/out.sql @@ -6,7 +6,7 @@ WITH `bfcte_0` AS ( SELECT *, LAST_VALUE(`bfcol_0` IGNORE NULLS) OVER ( - ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST + ORDER BY `bfcol_0` ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS `bfcol_1` FROM `bfcte_0` diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql index b5983c1517..ca3b9e8e54 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_rank/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - RANK() OVER (ORDER BY `bfcol_0` IS NULL DESC NULLS FIRST, `bfcol_0` DESC NULLS FIRST) AS `bfcol_1` + RANK() OVER (ORDER BY `bfcol_0` DESC NULLS FIRST) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_time_series_diff/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_time_series_diff/out.sql index 8ed95b3c07..692685ee4d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_time_series_diff/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_time_series_diff/out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - TIMESTAMP_DIFF( - `bfcol_0`, - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST), - MICROSECOND - ) AS `bfcol_1` + TIMESTAMP_DIFF(`bfcol_0`, LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST), MICROSECOND) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py index 609d3441a5..f1a3eced9a 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py @@ -133,7 +133,7 @@ def test_apply_window_if_present_all_params(self): ) self.assertEqual( result.sql(dialect="bigquery"), - "value OVER (PARTITION BY `col1` ORDER BY `col2` IS NULL ASC NULLS LAST, `col2` ASC NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)", + "value OVER (PARTITION BY `col1` ORDER BY `col2` ASC NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)", ) diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql index 5043435688..90825afd20 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql @@ -6,7 +6,7 @@ WITH `bfcte_3` AS ( ), `bfcte_7` AS ( SELECT *, - ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_4` + ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_4` FROM `bfcte_3` ), `bfcte_11` AS ( SELECT @@ -57,7 +57,7 @@ WITH `bfcte_3` AS ( ), `bfcte_5` AS ( SELECT *, - ROW_NUMBER() OVER (ORDER BY `bfcol_21` IS NULL ASC NULLS LAST, `bfcol_21` ASC NULLS LAST) AS `bfcol_25` + ROW_NUMBER() OVER (ORDER BY `bfcol_21` ASC NULLS LAST) AS `bfcol_25` FROM `bfcte_1` ), `bfcte_9` AS ( SELECT diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql index beb3caa073..f280933a74 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql @@ -24,14 +24,14 @@ WITH `bfcte_0` AS ( CASE WHEN SUM(CAST(NOT `bfcol_7` IS NULL AS INT64)) OVER ( PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` IS NULL ASC NULLS LAST, `bfcol_9` ASC NULLS LAST, `bfcol_2` IS NULL ASC NULLS LAST, `bfcol_2` ASC NULLS LAST + ORDER BY `bfcol_9` ASC NULLS LAST, `bfcol_2` ASC NULLS LAST ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ) < 3 THEN NULL ELSE COALESCE( SUM(CAST(`bfcol_7` AS INT64)) OVER ( PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` IS NULL ASC NULLS LAST, `bfcol_9` ASC NULLS LAST, `bfcol_2` IS NULL ASC NULLS LAST, `bfcol_2` ASC NULLS LAST + ORDER BY `bfcol_9` ASC NULLS LAST, `bfcol_2` ASC NULLS LAST ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ), 0 @@ -50,14 +50,14 @@ WITH `bfcte_0` AS ( CASE WHEN SUM(CAST(NOT `bfcol_8` IS NULL AS INT64)) OVER ( PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` IS NULL ASC NULLS LAST, `bfcol_9` ASC NULLS LAST, `bfcol_2` IS NULL ASC NULLS LAST, `bfcol_2` ASC NULLS LAST + ORDER BY `bfcol_9` ASC NULLS LAST, `bfcol_2` ASC NULLS LAST ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ) < 3 THEN NULL ELSE COALESCE( SUM(`bfcol_8`) OVER ( PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` IS NULL ASC NULLS LAST, `bfcol_9` ASC NULLS LAST, `bfcol_2` IS NULL ASC NULLS LAST, `bfcol_2` ASC NULLS LAST + ORDER BY `bfcol_9` ASC NULLS LAST, `bfcol_2` ASC NULLS LAST ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ), 0 diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql index 6d779a40ac..f22ef37b7e 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql @@ -7,16 +7,10 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_0` IS NULL AS INT64)) OVER ( - ORDER BY `bfcol_1` IS NULL ASC NULLS LAST, `bfcol_1` ASC NULLS LAST - ROWS BETWEEN 2 PRECEDING AND CURRENT ROW - ) < 3 + WHEN SUM(CAST(NOT `bfcol_0` IS NULL AS INT64)) OVER (ORDER BY `bfcol_1` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) < 3 THEN NULL ELSE COALESCE( - SUM(`bfcol_0`) OVER ( - ORDER BY `bfcol_1` IS NULL ASC NULLS LAST, `bfcol_1` ASC NULLS LAST - ROWS BETWEEN 2 PRECEDING AND CURRENT ROW - ), + SUM(`bfcol_0`) OVER (ORDER BY `bfcol_1` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), 0 ) END AS `bfcol_4` diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_wo_skips_nulls_op/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_wo_skips_nulls_op/out.sql index 1d5d9a9e45..dcf52f2e82 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_wo_skips_nulls_op/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_wo_skips_nulls_op/out.sql @@ -7,15 +7,9 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN COUNT(CAST(NOT `bfcol_0` IS NULL AS INT64)) OVER ( - ORDER BY `bfcol_1` IS NULL ASC NULLS LAST, `bfcol_1` ASC NULLS LAST - ROWS BETWEEN 4 PRECEDING AND CURRENT ROW - ) < 5 + WHEN COUNT(CAST(NOT `bfcol_0` IS NULL AS INT64)) OVER (ORDER BY `bfcol_1` ASC NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) < 5 THEN NULL - ELSE COUNT(`bfcol_0`) OVER ( - ORDER BY `bfcol_1` IS NULL ASC NULLS LAST, `bfcol_1` ASC NULLS LAST - ROWS BETWEEN 4 PRECEDING AND CURRENT ROW - ) + ELSE COUNT(`bfcol_0`) OVER (ORDER BY `bfcol_1` ASC NULLS LAST ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) END AS `bfcol_4` FROM `bfcte_0` ) From c4edf953a4104a1a9a48b390fc13e61de10f0187 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 15 Oct 2025 17:49:23 +0000 Subject: [PATCH 4/4] update comments --- .../core/compile/sqlglot/aggregations/windows.py | 12 +++++++++--- bigframes/core/compile/sqlglot/compiler.py | 8 ++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/bigframes/core/compile/sqlglot/aggregations/windows.py b/bigframes/core/compile/sqlglot/aggregations/windows.py index 5499fd86e8..099f5832da 100644 --- a/bigframes/core/compile/sqlglot/aggregations/windows.py +++ b/bigframes/core/compile/sqlglot/aggregations/windows.py @@ -97,7 +97,15 @@ def get_window_order_by( ordering: typing.Tuple[ordering_spec.OrderingExpression, ...], override_null_order: bool = False, ) -> typing.Optional[tuple[sge.Ordered, ...]]: - """Returns the SQL order by clause for a window specification.""" + """Returns the SQL order by clause for a window specification. + Args: + ordering (Tuple[ordering_spec.OrderingExpression, ...]): + A tuple of ordering specification objects. + override_null_order (bool): + If True, overrides BigQuery's default null ordering behavior, which + is sometimes incompatible with ordered aggregations. The generated SQL + will include extra expressions to correctly enforce NULL FIRST/LAST. + """ if not ordering: return None @@ -110,8 +118,6 @@ def get_window_order_by( nulls_first = not ordering_spec_item.na_last if override_null_order: - # Bigquery SQL considers NULLS to be "smallest" values, but we need - # to override in these cases. is_null_expr = sge.Is(this=expr, expression=sge.Null()) if nulls_first and desc: order_by.append( diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index 9248463664..47ad8db21b 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -299,12 +299,8 @@ def compile_random_sample( def compile_aggregate( self, node: nodes.AggregateNode, child: ir.SQLGlotIR ) -> ir.SQLGlotIR: - # TODO: Update the notes - # Note that, when the window is range-bounded, we only need one ordering key. - # There are two reasons: - # 1. Manipulating null positions requires more than one ordering key, which - # is forbidden by SQL window syntax for range rolling. - # 2. Pandas does not allow range rolling on timeseries with nulls. + # The BigQuery ordered aggregation cannot support for NULL FIRST/LAST, + # so we need to add extra expressions to enforce the null ordering. ordering_cols = windows.get_window_order_by( node.order_by, override_null_order=True )