Skip to content

Commit 5d6ec4f

Browse files
committed
refactor: left join with unnest
1 parent a4cf227 commit 5d6ec4f

File tree

3 files changed

+6
-6
lines changed
  • bigframes/core/compile/sqlglot
  • tests/unit/core/compile/sqlglot/snapshots/test_compile_explode
    • test_compile_explode_dataframe
    • test_compile_explode_series

3 files changed

+6
-6
lines changed

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -558,16 +558,15 @@ def _explode_single_column(
558558
)
559559
selection = sge.Star(replace=[unnested_column_alias.as_(column)])
560560

561-
# TODO: "CROSS" if not keep_empty else "LEFT"
562-
# TODO: overlaps_with_parent to replace existing column.
563561
new_expr = _select_to_cte(
564562
self.expr,
565563
sge.to_identifier(
566564
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
567565
),
568566
)
567+
# Use LEFT JOIN to preserve rows when unnesting empty arrays.
569568
new_expr = new_expr.select(selection, append=False).join(
570-
unnest_expr, join_type="CROSS"
569+
unnest_expr, join_type="LEFT"
571570
)
572571
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
573572

@@ -621,8 +620,9 @@ def _explode_multiple_columns(
621620
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
622621
),
623622
)
623+
# Use LEFT JOIN to preserve rows when unnesting empty arrays.
624624
new_expr = new_expr.select(selection, append=False).join(
625-
unnest_expr, join_type="CROSS"
625+
unnest_expr, join_type="LEFT"
626626
)
627627
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
628628

tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ WITH `bfcte_0` AS (
99
*
1010
REPLACE (`int_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `int_list_col`, `string_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `string_list_col`)
1111
FROM `bfcte_0`
12-
CROSS JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7`
12+
LEFT JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7`
1313
)
1414
SELECT
1515
`rowindex`,

tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ WITH `bfcte_0` AS (
88
*
99
REPLACE (`bfcol_8` AS `int_list_col`)
1010
FROM `bfcte_0`
11-
CROSS JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4`
11+
LEFT JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4`
1212
)
1313
SELECT
1414
`rowindex`,

0 commit comments

Comments
 (0)