Skip to content

Commit c3c292c

Browse files
authored
refactor: enable engine tests for sqlglot concat, filtering and string compilers (#2120)
* refactor: enable engine tests for sqlglot concat compiler * enable engine tests for filtering and strings * fix a compatible issue where the sg.union cannot support multiple expressions in the older version of sqlglot
1 parent 27b422f commit c3c292c

File tree

7 files changed

+228
-61
lines changed

7 files changed

+228
-61
lines changed

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def from_union(
175175
), f"At least two select expressions must be provided, but got {selects}."
176176

177177
existing_ctes: list[sge.CTE] = []
178-
union_selects: list[sge.Select] = []
178+
union_selects: list[sge.Expression] = []
179179
for select in selects:
180180
assert isinstance(
181181
select, sge.Select
@@ -204,10 +204,14 @@ def from_union(
204204
sge.Select().select(*selections).from_(sge.Table(this=new_cte_name))
205205
)
206206

207-
union_expr = sg.union(
208-
*union_selects,
209-
distinct=False,
210-
copy=False,
207+
union_expr = typing.cast(
208+
sge.Select,
209+
functools.reduce(
210+
lambda x, y: sge.Union(
211+
this=x, expression=y, distinct=False, copy=False
212+
),
213+
union_selects,
214+
),
211215
)
212216
final_select_expr = sge.Select().select(sge.Star()).from_(union_expr.subquery())
213217
final_select_expr.set("with", sge.With(expressions=existing_ctes))

tests/system/small/engines/test_concat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
2525

2626

27-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
27+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
2828
def test_engines_concat_self(
2929
scalars_array_value: array_value.ArrayValue,
3030
engine,
@@ -34,7 +34,7 @@ def test_engines_concat_self(
3434
assert_equivalence_execution(result.node, REFERENCE_ENGINE, engine)
3535

3636

37-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
37+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
3838
def test_engines_concat_filtered_sorted(
3939
scalars_array_value: array_value.ArrayValue,
4040
engine,

tests/system/small/engines/test_filtering.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
2525

2626

27-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
27+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
2828
def test_engines_filter_bool_col(
2929
scalars_array_value: array_value.ArrayValue,
3030
engine,
@@ -35,7 +35,7 @@ def test_engines_filter_bool_col(
3535
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)
3636

3737

38-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
38+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
3939
def test_engines_filter_expr_cond(
4040
scalars_array_value: array_value.ArrayValue,
4141
engine,
@@ -47,7 +47,7 @@ def test_engines_filter_expr_cond(
4747
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)
4848

4949

50-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
50+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
5151
def test_engines_filter_true(
5252
scalars_array_value: array_value.ArrayValue,
5353
engine,
@@ -57,7 +57,7 @@ def test_engines_filter_true(
5757
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)
5858

5959

60-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
60+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
6161
def test_engines_filter_false(
6262
scalars_array_value: array_value.ArrayValue,
6363
engine,

tests/system/small/engines/test_strings.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
2626

2727

28-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
28+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
2929
def test_engines_str_contains(scalars_array_value: array_value.ArrayValue, engine):
3030
arr, _ = scalars_array_value.compute_values(
3131
[
@@ -38,7 +38,7 @@ def test_engines_str_contains(scalars_array_value: array_value.ArrayValue, engin
3838
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
3939

4040

41-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
41+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
4242
def test_engines_str_contains_regex(
4343
scalars_array_value: array_value.ArrayValue, engine
4444
):
@@ -53,7 +53,7 @@ def test_engines_str_contains_regex(
5353
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
5454

5555

56-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
56+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
5757
def test_engines_str_startswith(scalars_array_value: array_value.ArrayValue, engine):
5858
arr, _ = scalars_array_value.compute_values(
5959
[
@@ -65,7 +65,7 @@ def test_engines_str_startswith(scalars_array_value: array_value.ArrayValue, eng
6565
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
6666

6767

68-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
68+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
6969
def test_engines_str_endswith(scalars_array_value: array_value.ArrayValue, engine):
7070
arr, _ = scalars_array_value.compute_values(
7171
[
Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,82 @@
11
WITH `bfcte_1` AS (
22
SELECT
3-
*
4-
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` INT64, `bfcol_2` INT64, `bfcol_3` STRING, `bfcol_4` INT64>>[STRUCT(0, 123456789, 0, 'Hello, World!', 0), STRUCT(1, -987654321, 1, 'こんにちは', 1), STRUCT(2, 314159, 2, ' ¡Hola Mundo! ', 2), STRUCT(3, CAST(NULL AS INT64), 3, CAST(NULL AS STRING), 3), STRUCT(4, -234892, 4, 'Hello, World!', 4), STRUCT(5, 55555, 5, 'Güten Tag!', 5), STRUCT(6, 101202303, 6, 'capitalize, This ', 6), STRUCT(7, -214748367, 7, ' سلام', 7), STRUCT(8, 2, 8, 'T', 8)])
3+
`int64_col` AS `bfcol_0`,
4+
`rowindex` AS `bfcol_1`,
5+
`string_col` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
57
), `bfcte_3` AS (
68
SELECT
79
*,
8-
`bfcol_4` AS `bfcol_10`
10+
ROW_NUMBER() OVER () AS `bfcol_7`
911
FROM `bfcte_1`
1012
), `bfcte_5` AS (
1113
SELECT
1214
*,
13-
0 AS `bfcol_16`
15+
0 AS `bfcol_8`
1416
FROM `bfcte_3`
1517
), `bfcte_6` AS (
1618
SELECT
17-
`bfcol_0` AS `bfcol_17`,
18-
`bfcol_2` AS `bfcol_18`,
19-
`bfcol_1` AS `bfcol_19`,
20-
`bfcol_3` AS `bfcol_20`,
21-
`bfcol_16` AS `bfcol_21`,
22-
`bfcol_10` AS `bfcol_22`
19+
`bfcol_1` AS `bfcol_9`,
20+
`bfcol_1` AS `bfcol_10`,
21+
`bfcol_0` AS `bfcol_11`,
22+
`bfcol_2` AS `bfcol_12`,
23+
`bfcol_8` AS `bfcol_13`,
24+
`bfcol_7` AS `bfcol_14`
2325
FROM `bfcte_5`
2426
), `bfcte_0` AS (
2527
SELECT
26-
*
27-
FROM UNNEST(ARRAY<STRUCT<`bfcol_23` INT64, `bfcol_24` INT64, `bfcol_25` INT64, `bfcol_26` STRING, `bfcol_27` INT64>>[STRUCT(0, 123456789, 0, 'Hello, World!', 0), STRUCT(1, -987654321, 1, 'こんにちは', 1), STRUCT(2, 314159, 2, ' ¡Hola Mundo! ', 2), STRUCT(3, CAST(NULL AS INT64), 3, CAST(NULL AS STRING), 3), STRUCT(4, -234892, 4, 'Hello, World!', 4), STRUCT(5, 55555, 5, 'Güten Tag!', 5), STRUCT(6, 101202303, 6, 'capitalize, This ', 6), STRUCT(7, -214748367, 7, ' سلام', 7), STRUCT(8, 2, 8, 'T', 8)])
28+
`int64_col` AS `bfcol_15`,
29+
`rowindex` AS `bfcol_16`,
30+
`string_col` AS `bfcol_17`
31+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
2832
), `bfcte_2` AS (
2933
SELECT
3034
*,
31-
`bfcol_27` AS `bfcol_33`
35+
ROW_NUMBER() OVER () AS `bfcol_22`
3236
FROM `bfcte_0`
3337
), `bfcte_4` AS (
3438
SELECT
3539
*,
36-
1 AS `bfcol_39`
40+
1 AS `bfcol_23`
3741
FROM `bfcte_2`
3842
), `bfcte_7` AS (
3943
SELECT
40-
`bfcol_23` AS `bfcol_40`,
41-
`bfcol_25` AS `bfcol_41`,
42-
`bfcol_24` AS `bfcol_42`,
43-
`bfcol_26` AS `bfcol_43`,
44-
`bfcol_39` AS `bfcol_44`,
45-
`bfcol_33` AS `bfcol_45`
44+
`bfcol_16` AS `bfcol_24`,
45+
`bfcol_16` AS `bfcol_25`,
46+
`bfcol_15` AS `bfcol_26`,
47+
`bfcol_17` AS `bfcol_27`,
48+
`bfcol_23` AS `bfcol_28`,
49+
`bfcol_22` AS `bfcol_29`
4650
FROM `bfcte_4`
4751
), `bfcte_8` AS (
4852
SELECT
4953
*
5054
FROM (
5155
SELECT
52-
`bfcol_17` AS `bfcol_46`,
53-
`bfcol_18` AS `bfcol_47`,
54-
`bfcol_19` AS `bfcol_48`,
55-
`bfcol_20` AS `bfcol_49`,
56-
`bfcol_21` AS `bfcol_50`,
57-
`bfcol_22` AS `bfcol_51`
56+
`bfcol_9` AS `bfcol_30`,
57+
`bfcol_10` AS `bfcol_31`,
58+
`bfcol_11` AS `bfcol_32`,
59+
`bfcol_12` AS `bfcol_33`,
60+
`bfcol_13` AS `bfcol_34`,
61+
`bfcol_14` AS `bfcol_35`
5862
FROM `bfcte_6`
5963
UNION ALL
6064
SELECT
61-
`bfcol_40` AS `bfcol_46`,
62-
`bfcol_41` AS `bfcol_47`,
63-
`bfcol_42` AS `bfcol_48`,
64-
`bfcol_43` AS `bfcol_49`,
65-
`bfcol_44` AS `bfcol_50`,
66-
`bfcol_45` AS `bfcol_51`
65+
`bfcol_24` AS `bfcol_30`,
66+
`bfcol_25` AS `bfcol_31`,
67+
`bfcol_26` AS `bfcol_32`,
68+
`bfcol_27` AS `bfcol_33`,
69+
`bfcol_28` AS `bfcol_34`,
70+
`bfcol_29` AS `bfcol_35`
6771
FROM `bfcte_7`
6872
)
6973
)
7074
SELECT
71-
`bfcol_46` AS `rowindex`,
72-
`bfcol_47` AS `rowindex_1`,
73-
`bfcol_48` AS `int64_col`,
74-
`bfcol_49` AS `string_col`
75+
`bfcol_30` AS `rowindex`,
76+
`bfcol_31` AS `rowindex_1`,
77+
`bfcol_32` AS `int64_col`,
78+
`bfcol_33` AS `string_col`
7579
FROM `bfcte_8`
7680
ORDER BY
77-
`bfcol_50` ASC NULLS LAST,
78-
`bfcol_51` ASC NULLS LAST
81+
`bfcol_34` ASC NULLS LAST,
82+
`bfcol_35` ASC NULLS LAST
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
WITH `bfcte_3` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`,
4+
`float64_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_7` AS (
7+
SELECT
8+
*,
9+
ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_4`
10+
FROM `bfcte_3`
11+
), `bfcte_11` AS (
12+
SELECT
13+
*,
14+
0 AS `bfcol_5`
15+
FROM `bfcte_7`
16+
), `bfcte_14` AS (
17+
SELECT
18+
`bfcol_1` AS `bfcol_6`,
19+
`bfcol_0` AS `bfcol_7`,
20+
`bfcol_5` AS `bfcol_8`,
21+
`bfcol_4` AS `bfcol_9`
22+
FROM `bfcte_11`
23+
), `bfcte_2` AS (
24+
SELECT
25+
`bool_col` AS `bfcol_10`,
26+
`int64_too` AS `bfcol_11`,
27+
`float64_col` AS `bfcol_12`
28+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
29+
), `bfcte_6` AS (
30+
SELECT
31+
*
32+
FROM `bfcte_2`
33+
WHERE
34+
`bfcol_10`
35+
), `bfcte_10` AS (
36+
SELECT
37+
*,
38+
ROW_NUMBER() OVER () AS `bfcol_15`
39+
FROM `bfcte_6`
40+
), `bfcte_13` AS (
41+
SELECT
42+
*,
43+
1 AS `bfcol_16`
44+
FROM `bfcte_10`
45+
), `bfcte_15` AS (
46+
SELECT
47+
`bfcol_12` AS `bfcol_17`,
48+
`bfcol_11` AS `bfcol_18`,
49+
`bfcol_16` AS `bfcol_19`,
50+
`bfcol_15` AS `bfcol_20`
51+
FROM `bfcte_13`
52+
), `bfcte_1` AS (
53+
SELECT
54+
`int64_col` AS `bfcol_21`,
55+
`float64_col` AS `bfcol_22`
56+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
57+
), `bfcte_5` AS (
58+
SELECT
59+
*,
60+
ROW_NUMBER() OVER (ORDER BY `bfcol_21` IS NULL ASC NULLS LAST, `bfcol_21` ASC NULLS LAST) AS `bfcol_25`
61+
FROM `bfcte_1`
62+
), `bfcte_9` AS (
63+
SELECT
64+
*,
65+
2 AS `bfcol_26`
66+
FROM `bfcte_5`
67+
), `bfcte_16` AS (
68+
SELECT
69+
`bfcol_22` AS `bfcol_27`,
70+
`bfcol_21` AS `bfcol_28`,
71+
`bfcol_26` AS `bfcol_29`,
72+
`bfcol_25` AS `bfcol_30`
73+
FROM `bfcte_9`
74+
), `bfcte_0` AS (
75+
SELECT
76+
`bool_col` AS `bfcol_31`,
77+
`int64_too` AS `bfcol_32`,
78+
`float64_col` AS `bfcol_33`
79+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
80+
), `bfcte_4` AS (
81+
SELECT
82+
*
83+
FROM `bfcte_0`
84+
WHERE
85+
`bfcol_31`
86+
), `bfcte_8` AS (
87+
SELECT
88+
*,
89+
ROW_NUMBER() OVER () AS `bfcol_36`
90+
FROM `bfcte_4`
91+
), `bfcte_12` AS (
92+
SELECT
93+
*,
94+
3 AS `bfcol_37`
95+
FROM `bfcte_8`
96+
), `bfcte_17` AS (
97+
SELECT
98+
`bfcol_33` AS `bfcol_38`,
99+
`bfcol_32` AS `bfcol_39`,
100+
`bfcol_37` AS `bfcol_40`,
101+
`bfcol_36` AS `bfcol_41`
102+
FROM `bfcte_12`
103+
), `bfcte_18` AS (
104+
SELECT
105+
*
106+
FROM (
107+
SELECT
108+
`bfcol_6` AS `bfcol_42`,
109+
`bfcol_7` AS `bfcol_43`,
110+
`bfcol_8` AS `bfcol_44`,
111+
`bfcol_9` AS `bfcol_45`
112+
FROM `bfcte_14`
113+
UNION ALL
114+
SELECT
115+
`bfcol_17` AS `bfcol_42`,
116+
`bfcol_18` AS `bfcol_43`,
117+
`bfcol_19` AS `bfcol_44`,
118+
`bfcol_20` AS `bfcol_45`
119+
FROM `bfcte_15`
120+
UNION ALL
121+
SELECT
122+
`bfcol_27` AS `bfcol_42`,
123+
`bfcol_28` AS `bfcol_43`,
124+
`bfcol_29` AS `bfcol_44`,
125+
`bfcol_30` AS `bfcol_45`
126+
FROM `bfcte_16`
127+
UNION ALL
128+
SELECT
129+
`bfcol_38` AS `bfcol_42`,
130+
`bfcol_39` AS `bfcol_43`,
131+
`bfcol_40` AS `bfcol_44`,
132+
`bfcol_41` AS `bfcol_45`
133+
FROM `bfcte_17`
134+
)
135+
)
136+
SELECT
137+
`bfcol_42` AS `float64_col`,
138+
`bfcol_43` AS `int64_col`
139+
FROM `bfcte_18`
140+
ORDER BY
141+
`bfcol_44` ASC NULLS LAST,
142+
`bfcol_45` ASC NULLS LAST

0 commit comments

Comments
 (0)