3333import bigframes .core .compile .ibis_types
3434import bigframes .core .compile .scalar_op_compiler as compile_scalar
3535import bigframes .core .compile .schema_translator
36- import bigframes .core .expression as ex
37- import bigframes .core .identifiers as ids
3836import bigframes .core .nodes as nodes
3937import bigframes .core .ordering as bf_ordering
4038import bigframes .core .rewrite as rewrites
@@ -52,65 +50,54 @@ class Compiler:
5250 scalar_op_compiler = compile_scalar .ScalarOpCompiler ()
5351
5452 def compile_sql (
55- self , node : nodes .BigFrameNode , ordered : bool , output_ids : typing .Sequence [str ]
53+ self ,
54+ node : nodes .BigFrameNode ,
55+ ordered : bool ,
56+ limit : typing .Optional [int ] = None ,
5657 ) -> str :
57- # TODO: get rid of output_ids arg
58- assert len (output_ids ) == len (list (node .fields ))
59- node = set_output_names (node , output_ids )
60- node = nodes .top_down (node , rewrites .rewrite_timedelta_expressions )
58+ # later steps might add ids, so snapshot before those steps.
59+ output_ids = node .schema .names
6160 if ordered :
62- node , limit = rewrites .pullup_limit_from_slice (node )
63- node = nodes .bottom_up (node , rewrites .rewrite_slice )
64- # TODO: Extract out CTEs
65- node , ordering = rewrites .pull_up_order (
66- node , order_root = True , ordered_joins = self .strict
67- )
68- node = rewrites .column_pruning (node )
69- ir = self .compile_node (node )
70- return ir .to_sql (
71- order_by = ordering .all_ordering_columns ,
72- limit = limit ,
73- selections = output_ids ,
74- )
75- else :
76- node = nodes .bottom_up (node , rewrites .rewrite_slice )
77- node , _ = rewrites .pull_up_order (
78- node , order_root = False , ordered_joins = self .strict
79- )
80- node = rewrites .column_pruning (node )
81- ir = self .compile_node (node )
82- return ir .to_sql (selections = output_ids )
61+ # Need to do this before replacing unsupported ops, as that will rewrite slice ops
62+ node , pulled_up_limit = rewrites .pullup_limit_from_slice (node )
63+ if (pulled_up_limit is not None ) and (
64+ (limit is None ) or limit > pulled_up_limit
65+ ):
66+ limit = pulled_up_limit
8367
84- def compile_peek_sql (self , node : nodes .BigFrameNode , n_rows : int ) -> str :
85- ids = [id .sql for id in node .ids ]
86- node = nodes .bottom_up (node , rewrites .rewrite_slice )
87- node = nodes .top_down (node , rewrites .rewrite_timedelta_expressions )
88- node , _ = rewrites .pull_up_order (
89- node , order_root = False , ordered_joins = self .strict
68+ node = self ._replace_unsupported_ops (node )
69+ # prune before pulling up order to avoid unnnecessary row_number() ops
70+ node = rewrites .column_pruning (node )
71+ node , ordering = rewrites .pull_up_order (
72+ node , order_root = ordered , ordered_joins = self .strict
9073 )
74+ # final pruning to cleanup up any leftovers unused values
9175 node = rewrites .column_pruning (node )
92- return self .compile_node (node ).to_sql (limit = n_rows , selections = ids )
76+ return self .compile_node (node ).to_sql (
77+ order_by = ordering .all_ordering_columns if ordered else (),
78+ limit = limit ,
79+ selections = output_ids ,
80+ )
9381
9482 def compile_raw (
9583 self ,
96- node : bigframes . core . nodes .BigFrameNode ,
84+ node : nodes .BigFrameNode ,
9785 ) -> typing .Tuple [
9886 str , typing .Sequence [google .cloud .bigquery .SchemaField ], bf_ordering .RowOrdering
9987 ]:
100- node = nodes .bottom_up (node , rewrites .rewrite_slice )
101- node = nodes .top_down (node , rewrites .rewrite_timedelta_expressions )
102- node , ordering = rewrites .pull_up_order (node , ordered_joins = self .strict )
88+ node = self ._replace_unsupported_ops (node )
89+ node = rewrites .column_pruning (node )
90+ node , ordering = rewrites .pull_up_order (
91+ node , order_root = True , ordered_joins = self .strict
92+ )
10393 node = rewrites .column_pruning (node )
104- ir = self .compile_node (node )
105- sql = ir .to_sql ()
94+ sql = self .compile_node (node ).to_sql ()
10695 return sql , node .schema .to_bigquery (), ordering
10796
108- def _preprocess (self , node : nodes .BigFrameNode ):
97+ def _replace_unsupported_ops (self , node : nodes .BigFrameNode ):
98+ # TODO: Run all replacement rules as single bottom-up pass
10999 node = nodes .bottom_up (node , rewrites .rewrite_slice )
110- node = nodes .top_down (node , rewrites .rewrite_timedelta_expressions )
111- node , _ = rewrites .pull_up_order (
112- node , order_root = False , ordered_joins = self .strict
113- )
100+ node = nodes .bottom_up (node , rewrites .rewrite_timedelta_expressions )
114101 return node
115102
116103 # TODO: Remove cache when schema no longer requires compilation to derive schema (and therefor only compiles for execution)
@@ -305,16 +292,3 @@ def compile_explode(self, node: nodes.ExplodeNode):
305292 @_compile_node .register
306293 def compile_random_sample (self , node : nodes .RandomSampleNode ):
307294 return self .compile_node (node .child )._uniform_sampling (node .fraction )
308-
309-
310- def set_output_names (
311- node : bigframes .core .nodes .BigFrameNode , output_ids : typing .Sequence [str ]
312- ):
313- # TODO: Create specialized output operators that will handle final names
314- return nodes .SelectionNode (
315- node ,
316- tuple (
317- bigframes .core .nodes .AliasedRef (ex .DerefOp (old_id ), ids .ColumnId (out_id ))
318- for old_id , out_id in zip (node .ids , output_ids )
319- ),
320- )
0 commit comments