@@ -159,16 +159,36 @@ def __init__(
159159 else tuple ([None for _ in index_columns ])
160160 )
161161 self ._expr = self ._normalize_expression (expr , self ._index_columns )
162+
163+ # Calculate value_columns after normalizing expression
164+ all_value_columns = [
165+ column
166+ for column in self ._expr .column_ids
167+ if column not in self .index_columns
168+ ]
169+
162170 # Use pandas index to more easily replicate column indexing, especially for hierarchical column index
163171 self ._column_labels = (
164172 column_labels .copy ()
165173 if isinstance (column_labels , pd .Index )
166174 else pd .Index (column_labels )
167175 )
168- if len (self .value_columns ) != len (self ._column_labels ):
169- raise ValueError (
170- f"'value_columns' (size { len (self .value_columns )} ) and 'column_labels' (size { len (self ._column_labels )} ) must have equal length"
176+
177+ # Adjust column_labels and value_columns to match
178+ if len (all_value_columns ) > len (self ._column_labels ):
179+ # More columns than labels: Drop the extra columns (assumed to be internal/garbage)
180+ self ._value_columns = all_value_columns [: len (self ._column_labels )]
181+ # Prune the expression to remove hidden columns
182+ self ._expr = self ._expr .select_columns (
183+ [* self .index_columns , * self ._value_columns ]
171184 )
185+ elif len (all_value_columns ) < len (self ._column_labels ):
186+ # Fewer columns than labels: Truncate labels
187+ self ._value_columns = all_value_columns
188+ self ._column_labels = self ._column_labels [: len (self ._value_columns )]
189+ else :
190+ self ._value_columns = all_value_columns
191+
172192 # col_id -> [stat_name -> scalar]
173193 # TODO: Preserve cache under safe transforms (eg. drop column, reorder)
174194 self ._stats_cache : dict [str , dict [str , typing .Any ]] = {
@@ -285,11 +305,15 @@ def index_columns(self) -> Sequence[str]:
285305 @property
286306 def value_columns (self ) -> Sequence [str ]:
287307 """All value columns, mutually exclusive with index columns."""
288- return [
289- column
290- for column in self ._expr .column_ids
291- if column not in self .index_columns
292- ]
308+ return getattr (
309+ self ,
310+ "_value_columns" ,
311+ [
312+ column
313+ for column in self ._expr .column_ids
314+ if column not in self .index_columns
315+ ],
316+ )
293317
294318 @property
295319 def column_labels (self ) -> pd .Index :
@@ -3471,18 +3495,31 @@ def _pd_index_to_array_value(
34713495 Create an ArrayValue from a list of label tuples.
34723496 The last column will be row offsets.
34733497 """
3498+ id_gen = bigframes .core .identifiers .standard_id_strings ()
3499+ index_ids = [next (id_gen ) for _ in range (index .nlevels )]
3500+ offset_id = next (id_gen )
3501+
34743502 rows = []
34753503 labels_as_tuples = utils .index_as_tuples (index )
34763504 for row_offset in range (len (index )):
3477- id_gen = bigframes .core .identifiers .standard_id_strings ()
34783505 row_label = labels_as_tuples [row_offset ]
3479- row_label = ( row_label ,) if not isinstance ( row_label , tuple ) else row_label
3480- row = {}
3481- for label_part , id in zip (row_label , id_gen ):
3482- row [ id ] = label_part if pd . notnull ( label_part ) else None
3483- row [next ( id_gen ) ] = row_offset
3506+ row = {
3507+ id : ( val if pd . notnull ( val ) else None )
3508+ for id , val in zip (index_ids , row_label )
3509+ }
3510+ row [offset_id ] = row_offset
34843511 rows .append (row )
34853512
3513+ if not rows :
3514+ # Create empty table with correct columns
3515+ schema = pa .schema (
3516+ [pa .field (id , pa .null ()) for id in index_ids ]
3517+ + [pa .field (offset_id , pa .int64 ())]
3518+ )
3519+ return core .ArrayValue .from_pyarrow (
3520+ pa .Table .from_batches ([], schema = schema ), session = session
3521+ )
3522+
34863523 return core .ArrayValue .from_pyarrow (pa .Table .from_pylist (rows ), session = session )
34873524
34883525
0 commit comments