Skip to content

Commit 6fafc08

Browse files
committed
For isin, use subquery rather than cte for the right selections
1 parent e8344b6 commit 6fafc08

File tree

4 files changed

+37
-28
lines changed

4 files changed

+37
-28
lines changed

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,10 @@ def isin_join(
347347
left_cte_name = sge.to_identifier(
348348
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
349349
)
350-
right_cte_name = sge.to_identifier(
351-
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
352-
)
353350

354351
left_select = _select_to_cte(self.expr, left_cte_name)
355-
right_select = _select_to_cte(right.expr, right_cte_name)
352+
# Prefer subquery over CTE for the IN clause's right side to improve SQL readability.
353+
right_select = right.expr
356354

357355
left_ctes = left_select.args.pop("with", [])
358356
right_ctes = right_select.args.pop("with", [])
@@ -362,25 +360,28 @@ def isin_join(
362360
sge.Column(this=conditions[0].expr, table=left_cte_name),
363361
conditions[0].dtype,
364362
)
365-
right_condition = typed_expr.TypedExpr(
366-
sge.Column(this=conditions[1].expr, table=right_cte_name),
367-
conditions[1].dtype,
368-
)
369363

370364
new_column: sge.Expression
371365
if joins_nulls:
366+
right_table_name = sge.to_identifier(
367+
next(self.uid_gen.get_uid_stream("bft_")), quoted=self.quoted
368+
)
369+
right_condition = typed_expr.TypedExpr(
370+
sge.Column(this=conditions[1].expr, table=right_table_name),
371+
conditions[1].dtype,
372+
)
372373
new_column = sge.Exists(
373374
this=sge.Select()
374375
.select(sge.convert(1))
375-
.from_(sge.Table(this=right_cte_name))
376+
.from_(sge.Alias(this=right_select.subquery(), alias=right_table_name))
376377
.where(
377378
_join_condition(left_condition, right_condition, joins_nulls=True)
378379
)
379380
)
380381
else:
381382
new_column = sge.In(
382383
this=left_condition.expr,
383-
expressions=[right_condition.expr],
384+
expressions=[right_select.subquery()],
384385
)
385386

386387
new_column = sge.Alias(

tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,25 @@ WITH `bfcte_1` AS (
1313
`int64_too` AS `bfcol_4`
1414
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1515
), `bfcte_3` AS (
16-
SELECT
17-
`bfcol_4`
18-
FROM `bfcte_0`
19-
GROUP BY
20-
`bfcol_4`
21-
), `bfcte_4` AS (
2216
SELECT
2317
`bfcte_2`.*,
2418
EXISTS(
2519
SELECT
2620
1
27-
FROM `bfcte_3`
21+
FROM (
22+
SELECT
23+
`bfcol_4`
24+
FROM `bfcte_0`
25+
GROUP BY
26+
`bfcol_4`
27+
) AS `bft_0`
2828
WHERE
29-
COALESCE(`bfcte_2`.`bfcol_3`, 0) = COALESCE(`bfcte_3`.`bfcol_4`, 0)
30-
AND COALESCE(`bfcte_2`.`bfcol_3`, 1) = COALESCE(`bfcte_3`.`bfcol_4`, 1)
29+
COALESCE(`bfcte_2`.`bfcol_3`, 0) = COALESCE(`bft_0`.`bfcol_4`, 0)
30+
AND COALESCE(`bfcte_2`.`bfcol_3`, 1) = COALESCE(`bft_0`.`bfcol_4`, 1)
3131
) AS `bfcol_5`
3232
FROM `bfcte_2`
3333
)
3434
SELECT
3535
`bfcol_2` AS `rowindex`,
3636
`bfcol_5` AS `int64_col`
37-
FROM `bfcte_4`
37+
FROM `bfcte_3`

tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,18 @@ WITH `bfcte_1` AS (
1313
`rowindex_2` AS `bfcol_4`
1414
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1515
), `bfcte_3` AS (
16-
SELECT
17-
`bfcol_4`
18-
FROM `bfcte_0`
19-
GROUP BY
20-
`bfcol_4`
21-
), `bfcte_4` AS (
2216
SELECT
2317
`bfcte_2`.*,
24-
`bfcte_2`.`bfcol_3` IN (`bfcte_3`.`bfcol_4`) AS `bfcol_5`
18+
`bfcte_2`.`bfcol_3` IN ((
19+
SELECT
20+
`bfcol_4`
21+
FROM `bfcte_0`
22+
GROUP BY
23+
`bfcol_4`
24+
)) AS `bfcol_5`
2525
FROM `bfcte_2`
2626
)
2727
SELECT
2828
`bfcol_2` AS `rowindex`,
2929
`bfcol_5` AS `rowindex_2`
30-
FROM `bfcte_4`
30+
FROM `bfcte_3`

tests/unit/core/compile/sqlglot/test_compile_isin.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,20 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import sys
16+
1517
import pytest
1618

1719
import bigframes.pandas as bpd
1820

1921
pytest.importorskip("pytest_snapshot")
2022

23+
if sys.version_info < (3, 12):
24+
pytest.skip(
25+
"Skipping test due to inconsistent SQL formatting on Python < 3.12.",
26+
allow_module_level=True,
27+
)
28+
2129

2230
def test_compile_isin(scalar_types_df: bpd.DataFrame, snapshot):
2331
bf_isin = scalar_types_df["int64_col"].isin(scalar_types_df["int64_too"]).to_frame()

0 commit comments

Comments
 (0)