@@ -159,20 +159,48 @@ def __init__(
159159 else tuple ([None for _ in index_columns ])
160160 )
161161 self ._expr = self ._normalize_expression (expr , self ._index_columns )
162+
163+ # FIX: Calculate value_columns after normalizing expression
164+ actual_value_columns = [
165+ column
166+ for column in self ._expr .column_ids
167+ if column not in self .index_columns
168+ ]
169+
162170 # Use pandas index to more easily replicate column indexing, especially for hierarchical column index
163171 self ._column_labels = (
164172 column_labels .copy ()
165173 if isinstance (column_labels , pd .Index )
166174 else pd .Index (column_labels )
167175 )
168- if len (self .value_columns ) != len (self ._column_labels ):
176+
177+ # FIX: Adjust column_labels if needed to match actual_value_columns
178+ if len (actual_value_columns ) != len (self ._column_labels ):
179+ # If we have more value columns than labels, extend with None
180+ if len (actual_value_columns ) > len (self ._column_labels ):
181+ additional_labels = [None ] * (
182+ len (actual_value_columns ) - len (self ._column_labels )
183+ )
184+ self ._column_labels = self ._column_labels .append (
185+ pd .Index (additional_labels )
186+ )
187+ # If we have fewer value columns than labels, truncate
188+ else :
189+ self ._column_labels = self ._column_labels [: len (actual_value_columns )]
190+
191+ # Re-validate after adjustment
192+ if len (actual_value_columns ) != len (self ._column_labels ):
169193 raise ValueError (
170- f"'value_columns' (size { len (self . value_columns )} ) and 'column_labels' (size { len (self ._column_labels )} ) must have equal length"
194+ f"'value_columns' (size { len (actual_value_columns )} ) and 'column_labels' (size { len (self ._column_labels )} ) must have equal length"
171195 )
196+
197+ # Update value_columns property to use actual_value_columns
198+ self ._value_columns = actual_value_columns
199+
172200 # col_id -> [stat_name -> scalar]
173201 # TODO: Preserve cache under safe transforms (eg. drop column, reorder)
174202 self ._stats_cache : dict [str , dict [str , typing .Any ]] = {
175- col_id : {} for col_id in self .value_columns
203+ col_id : {} for col_id in self ._value_columns
176204 }
177205 # TODO(kemppeterson) Add a cache for corr to parallel the single-column stats.
178206
@@ -285,11 +313,15 @@ def index_columns(self) -> Sequence[str]:
285313 @property
286314 def value_columns (self ) -> Sequence [str ]:
287315 """All value columns, mutually exclusive with index columns."""
288- return [
289- column
290- for column in self ._expr .column_ids
291- if column not in self .index_columns
292- ]
316+ return getattr (
317+ self ,
318+ "_value_columns" ,
319+ [
320+ column
321+ for column in self ._expr .column_ids
322+ if column not in self .index_columns
323+ ],
324+ )
293325
294326 @property
295327 def column_labels (self ) -> pd .Index :
0 commit comments