File tree Expand file tree Collapse file tree 3 files changed +19
-9
lines changed
Expand file tree Collapse file tree 3 files changed +19
-9
lines changed Original file line number Diff line number Diff line change @@ -382,9 +382,8 @@ def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue:
382382 for column_id in column_ids :
383383 assert bigframes .dtypes .is_array_like (self .get_column_type (column_id ))
384384
385- return ArrayValue (
386- nodes .ExplodeNode (child = self .node , column_ids = tuple (column_ids ))
387- )
385+ offsets = tuple (self .get_offset_for_name (id ) for id in column_ids )
386+ return ArrayValue (nodes .ExplodeNode (child = self .node , column_ids = offsets ))
388387
389388 def _uniform_sampling (self , fraction : float ) -> ArrayValue :
390389 """Sampling the table on given fraction.
@@ -393,3 +392,6 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
393392 The row numbers of result is non-deterministic, avoid to use.
394393 """
395394 return ArrayValue (nodes .RandomSampleNode (self .node , fraction ))
395+
396+ def get_offset_for_name (self , name : str ):
397+ return self .schema .names .index (name )
Original file line number Diff line number Diff line change @@ -401,8 +401,9 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR:
401401 columns = columns ,
402402 )
403403
404- def explode (self , column_ids : typing .Sequence [str ]) -> UnorderedIR :
404+ def explode (self , offsets : typing .Sequence [int ]) -> UnorderedIR :
405405 table = self ._to_ibis_expr ()
406+ column_ids = tuple (table .columns [offset ] for offset in offsets )
406407
407408 # The offset array ensures null represents empty arrays after unnesting.
408409 offset_array_id = bigframes .core .guid .generate_guid ("offset_array_" )
@@ -712,16 +713,20 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR:
712713 ordering = self ._ordering ,
713714 )
714715
715- def explode (self , column_ids : typing .Sequence [str ]) -> OrderedIR :
716+ def explode (self , offsets : typing .Sequence [int ]) -> OrderedIR :
716717 table = self ._to_ibis_expr (ordering_mode = "unordered" , expose_hidden_cols = True )
718+ column_ids = tuple (table .columns [offset ] for offset in offsets )
717719
718720 offset_array_id = bigframes .core .guid .generate_guid ("offset_array_" )
719721 offset_array = (
720722 vendored_ibis_ops .GenerateArray (
721723 ibis .greatest (
722724 0 ,
723725 ibis .least (
724- * [table [column_id ].length () - 1 for column_id in column_ids ]
726+ * [
727+ table [table .columns [offset ]].length () - 1
728+ for offset in offsets
729+ ]
725730 ),
726731 )
727732 )
Original file line number Diff line number Diff line change 4242OVERHEAD_VARIABLES = 5
4343
4444
45+ COL_OFFSET = int
46+
47+
4548@dataclass (frozen = True )
4649class BigFrameNode :
4750 """
@@ -826,7 +829,7 @@ def variables_introduced(self) -> int:
826829
827830@dataclass (frozen = True )
828831class ExplodeNode (UnaryNode ):
829- column_ids : typing .Tuple [str , ...]
832+ column_ids : typing .Tuple [COL_OFFSET , ...]
830833
831834 @property
832835 def row_preserving (self ) -> bool :
@@ -844,9 +847,9 @@ def schema(self) -> schemata.ArraySchema:
844847 self .child .schema .get_type (name ).pyarrow_dtype .value_type
845848 ),
846849 )
847- if name in self .column_ids
850+ if offset in self .column_ids
848851 else schemata .SchemaItem (name , self .child .schema .get_type (name ))
849- for name in self .child .schema .names
852+ for offset , name in enumerate ( self .child .schema .names )
850853 )
851854 return schemata .ArraySchema (items )
852855
You can’t perform that action at this time.
0 commit comments