4545import bigframes .core .identifiers
4646import bigframes .core .nodes as nodes
4747import bigframes .core .ordering as order
48- import bigframes .core .rewrite as rewrites
4948import bigframes .core .schema
5049import bigframes .core .tree_properties as tree_properties
5150import bigframes .features
@@ -128,7 +127,7 @@ def to_sql(
128127 col_id_overrides = dict (col_id_overrides )
129128 col_id_overrides [internal_offset_col ] = offset_column
130129 node = (
131- self ._get_optimized_plan (array_value .node )
130+ self ._sub_cache_subtrees (array_value .node )
132131 if enable_cache
133132 else array_value .node
134133 )
@@ -279,7 +278,7 @@ def peek(
279278 """
280279 A 'peek' efficiently accesses a small number of rows in the dataframe.
281280 """
282- plan = self ._get_optimized_plan (array_value .node )
281+ plan = self ._sub_cache_subtrees (array_value .node )
283282 if not tree_properties .can_fast_peek (plan ):
284283 warnings .warn ("Peeking this value cannot be done efficiently." )
285284
@@ -314,15 +313,15 @@ def head(
314313 # No user-provided ordering, so just get any N rows, its faster!
315314 return self .peek (array_value , n_rows )
316315
317- plan = self ._get_optimized_plan (array_value .node )
316+ plan = self ._sub_cache_subtrees (array_value .node )
318317 if not tree_properties .can_fast_head (plan ):
319318 # If can't get head fast, we are going to need to execute the whole query
320319 # Will want to do this in a way such that the result is reusable, but the first
321320 # N values can be easily extracted.
322321 # This currently requires clustering on offsets.
323322 self ._cache_with_offsets (array_value )
324323 # Get a new optimized plan after caching
325- plan = self ._get_optimized_plan (array_value .node )
324+ plan = self ._sub_cache_subtrees (array_value .node )
326325 assert tree_properties .can_fast_head (plan )
327326
328327 head_plan = generate_head_plan (plan , n_rows )
@@ -347,7 +346,7 @@ def get_row_count(self, array_value: bigframes.core.ArrayValue) -> int:
347346 if count is not None :
348347 return count
349348 else :
350- row_count_plan = self ._get_optimized_plan (
349+ row_count_plan = self ._sub_cache_subtrees (
351350 generate_row_count_plan (array_value .node )
352351 )
353352 sql = self .compiler .compile_unordered (row_count_plan )
@@ -359,7 +358,7 @@ def _local_get_row_count(
359358 ) -> Optional [int ]:
360359 # optimized plan has cache materializations which will have row count metadata
361360 # that is more likely to be usable than original leaf nodes.
362- plan = self ._get_optimized_plan (array_value .node )
361+ plan = self ._sub_cache_subtrees (array_value .node )
363362 return tree_properties .row_count (plan )
364363
365364 # Helpers
@@ -424,21 +423,14 @@ def _wait_on_job(
424423 self .metrics .count_job_stats (query_job )
425424 return results_iterator
426425
427- def _get_optimized_plan (self , node : nodes .BigFrameNode ) -> nodes .BigFrameNode :
426+ def _sub_cache_subtrees (self , node : nodes .BigFrameNode ) -> nodes .BigFrameNode :
428427 """
429428 Takes the original expression tree and applies optimizations to accelerate execution.
430429
431430 At present, the only optimization is to replace subtress with cached previous materializations.
432431 """
433432 # Apply any rewrites *after* applying cache, as cache is sensitive to exact tree structure
434- optimized_plan = tree_properties .replace_nodes (
435- node , (dict (self ._cached_executions ))
436- )
437- if ENABLE_PRUNING :
438- used_fields = frozenset (field .id for field in optimized_plan .fields )
439- optimized_plan = optimized_plan .prune (used_fields )
440- optimized_plan = rewrites .replace_slice_ops (optimized_plan )
441- return optimized_plan
433+ return tree_properties .replace_nodes (node , (dict (self ._cached_executions )))
442434
443435 def _is_trivially_executable (self , array_value : bigframes .core .ArrayValue ):
444436 """
@@ -448,7 +440,7 @@ def _is_trivially_executable(self, array_value: bigframes.core.ArrayValue):
448440 # Once rewriting is available, will want to rewrite before
449441 # evaluating execution cost.
450442 return tree_properties .is_trivially_executable (
451- self ._get_optimized_plan (array_value .node )
443+ self ._sub_cache_subtrees (array_value .node )
452444 )
453445
454446 def _cache_with_cluster_cols (
@@ -457,7 +449,7 @@ def _cache_with_cluster_cols(
457449 """Executes the query and uses the resulting table to rewrite future executions."""
458450
459451 sql , schema , ordering_info = self .compiler .compile_raw (
460- self ._get_optimized_plan (array_value .node )
452+ self ._sub_cache_subtrees (array_value .node )
461453 )
462454 tmp_table = self ._sql_as_cached_temp_table (
463455 sql ,
@@ -474,7 +466,7 @@ def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
474466 """Executes the query and uses the resulting table to rewrite future executions."""
475467 offset_column = bigframes .core .guid .generate_guid ("bigframes_offsets" )
476468 w_offsets , offset_column = array_value .promote_offsets ()
477- sql = self .compiler .compile_unordered (self ._get_optimized_plan (w_offsets .node ))
469+ sql = self .compiler .compile_unordered (self ._sub_cache_subtrees (w_offsets .node ))
478470
479471 tmp_table = self ._sql_as_cached_temp_table (
480472 sql ,
@@ -510,7 +502,7 @@ def _simplify_with_caching(self, array_value: bigframes.core.ArrayValue):
510502 """Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
511503 # Apply existing caching first
512504 for _ in range (MAX_SUBTREE_FACTORINGS ):
513- node_with_cache = self ._get_optimized_plan (array_value .node )
505+ node_with_cache = self ._sub_cache_subtrees (array_value .node )
514506 if node_with_cache .planning_complexity < QUERY_COMPLEXITY_LIMIT :
515507 return
516508
@@ -567,7 +559,7 @@ def _validate_result_schema(
567559 ):
568560 actual_schema = tuple (bq_schema )
569561 ibis_schema = bigframes .core .compile .test_only_ibis_inferred_schema (
570- self ._get_optimized_plan (array_value .node )
562+ self ._sub_cache_subtrees (array_value .node )
571563 )
572564 internal_schema = array_value .schema
573565 if not bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable :
0 commit comments