Skip to content

Commit 14a4272

Browse files
committed
fix: handle value_columns and column_labels mismatch in Block
1 parent ee0ee0f commit 14a4272

File tree

1 file changed

+40
-8
lines changed

1 file changed

+40
-8
lines changed

bigframes/core/blocks.py

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,20 +159,48 @@ def __init__(
159159
else tuple([None for _ in index_columns])
160160
)
161161
self._expr = self._normalize_expression(expr, self._index_columns)
162+
163+
# FIX: Calculate value_columns after normalizing expression
164+
actual_value_columns = [
165+
column
166+
for column in self._expr.column_ids
167+
if column not in self.index_columns
168+
]
169+
162170
# Use pandas index to more easily replicate column indexing, especially for hierarchical column index
163171
self._column_labels = (
164172
column_labels.copy()
165173
if isinstance(column_labels, pd.Index)
166174
else pd.Index(column_labels)
167175
)
168-
if len(self.value_columns) != len(self._column_labels):
176+
177+
# FIX: Adjust column_labels if needed to match actual_value_columns
178+
if len(actual_value_columns) != len(self._column_labels):
179+
# If we have more value columns than labels, extend with None
180+
if len(actual_value_columns) > len(self._column_labels):
181+
additional_labels = [None] * (
182+
len(actual_value_columns) - len(self._column_labels)
183+
)
184+
self._column_labels = self._column_labels.append(
185+
pd.Index(additional_labels)
186+
)
187+
# If we have fewer value columns than labels, truncate
188+
else:
189+
self._column_labels = self._column_labels[: len(actual_value_columns)]
190+
191+
# Re-validate after adjustment
192+
if len(actual_value_columns) != len(self._column_labels):
169193
raise ValueError(
170-
f"'value_columns' (size {len(self.value_columns)}) and 'column_labels' (size {len(self._column_labels)}) must have equal length"
194+
f"'value_columns' (size {len(actual_value_columns)}) and 'column_labels' (size {len(self._column_labels)}) must have equal length"
171195
)
196+
197+
# Update value_columns property to use actual_value_columns
198+
self._value_columns = actual_value_columns
199+
172200
# col_id -> [stat_name -> scalar]
173201
# TODO: Preserve cache under safe transforms (eg. drop column, reorder)
174202
self._stats_cache: dict[str, dict[str, typing.Any]] = {
175-
col_id: {} for col_id in self.value_columns
203+
col_id: {} for col_id in self._value_columns
176204
}
177205
# TODO(kemppeterson) Add a cache for corr to parallel the single-column stats.
178206

@@ -285,11 +313,15 @@ def index_columns(self) -> Sequence[str]:
285313
@property
286314
def value_columns(self) -> Sequence[str]:
287315
"""All value columns, mutually exclusive with index columns."""
288-
return [
289-
column
290-
for column in self._expr.column_ids
291-
if column not in self.index_columns
292-
]
316+
return getattr(
317+
self,
318+
"_value_columns",
319+
[
320+
column
321+
for column in self._expr.column_ids
322+
if column not in self.index_columns
323+
],
324+
)
293325

294326
@property
295327
def column_labels(self) -> pd.Index:

0 commit comments

Comments
 (0)