Skip to content

Commit ac2b717

Browse files
committed
fix: prune extra columns in Block.__init__ to prevent unbound variables
1 parent 2bc8ea1 commit ac2b717

File tree

1 file changed

+15
-26
lines changed

1 file changed

+15
-26
lines changed

bigframes/core/blocks.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def __init__(
161161
self._expr = self._normalize_expression(expr, self._index_columns)
162162

163163
# Calculate value_columns after normalizing expression
164-
actual_value_columns = [
164+
all_value_columns = [
165165
column
166166
for column in self._expr.column_ids
167167
if column not in self.index_columns
@@ -173,31 +173,20 @@ def __init__(
173173
if isinstance(column_labels, pd.Index)
174174
else pd.Index(column_labels)
175175
)
176-
177-
# Adjust column_labels if needed to match actual_value_columns
178-
if len(actual_value_columns) != len(self._column_labels):
179-
# If we have more value columns than labels, extend with None
180-
if len(actual_value_columns) > len(self._column_labels):
181-
additional_labels = [None] * (
182-
len(actual_value_columns) - len(self._column_labels)
183-
)
184-
self._column_labels = self._column_labels.append(
185-
pd.Index(additional_labels)
186-
)
187-
# If we have fewer value columns than labels, truncate
188-
else:
189-
self._column_labels = self._column_labels[: len(actual_value_columns)]
190-
191-
# Re-validate after adjustment
192-
if len(actual_value_columns) != len(self._column_labels):
193-
raise ValueError(
194-
f"'value_columns' (size {len(actual_value_columns)}) and "
195-
f"'column_labels' (size {len(self._column_labels)}) must have equal length"
196-
)
197-
198-
# Update value_columns property to use actual_value_columns
199-
self._value_columns = actual_value_columns
200-
176+
# Adjust column_labels and value_columns to match
177+
if len(all_value_columns) > len(self._column_labels):
178+
# More columns than labels: Drop the extra columns (assumed to be internal/garbage)
179+
self._value_columns = all_value_columns[: len(self._column_labels)]
180+
# Prune the expression to remove hidden columns
181+
self._expr = self._expr.select_columns(
182+
[*self.index_columns, *self._value_columns]
183+
)
184+
elif len(all_value_columns) < len(self._column_labels):
185+
# Fewer columns than labels: Truncate labels
186+
self._value_columns = all_value_columns
187+
self._column_labels = self._column_labels[: len(self._value_columns)]
188+
else:
189+
self._value_columns = all_value_columns
201190
# col_id -> [stat_name -> scalar]
202191
# TODO: Preserve cache under safe transforms (eg. drop column, reorder)
203192
self._stats_cache: dict[str, dict[str, typing.Any]] = {

0 commit comments

Comments
 (0)