Skip to content

Commit 3cbd6e6

Browse files
committed
fix: prune extra columns in Block.__init__ to prevent unbound variables
1 parent 2bc8ea1 commit 3cbd6e6

File tree

2 files changed

+25
-22
lines changed

2 files changed

+25
-22
lines changed

bigframes/core/blocks.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def __init__(
161161
self._expr = self._normalize_expression(expr, self._index_columns)
162162

163163
# Calculate value_columns after normalizing expression
164-
actual_value_columns = [
164+
all_value_columns = [
165165
column
166166
for column in self._expr.column_ids
167167
if column not in self.index_columns
@@ -174,29 +174,28 @@ def __init__(
174174
else pd.Index(column_labels)
175175
)
176176

177-
# Adjust column_labels if needed to match actual_value_columns
178-
if len(actual_value_columns) != len(self._column_labels):
179-
# If we have more value columns than labels, extend with None
180-
if len(actual_value_columns) > len(self._column_labels):
181-
additional_labels = [None] * (
182-
len(actual_value_columns) - len(self._column_labels)
183-
)
184-
self._column_labels = self._column_labels.append(
185-
pd.Index(additional_labels)
186-
)
187-
# If we have fewer value columns than labels, truncate
188-
else:
189-
self._column_labels = self._column_labels[: len(actual_value_columns)]
177+
# DEBUG
178+
print(
179+
f"DEBUG: Block init. all_value_columns={len(all_value_columns)}, column_labels={len(self._column_labels)}"
180+
)
190181

191-
# Re-validate after adjustment
192-
if len(actual_value_columns) != len(self._column_labels):
193-
raise ValueError(
194-
f"'value_columns' (size {len(actual_value_columns)}) and "
195-
f"'column_labels' (size {len(self._column_labels)}) must have equal length"
182+
# Adjust column_labels and value_columns to match
183+
if len(all_value_columns) > len(self._column_labels):
184+
print(
185+
f"DEBUG: Pruning {len(all_value_columns) - len(self._column_labels)} extra columns"
196186
)
197-
198-
# Update value_columns property to use actual_value_columns
199-
self._value_columns = actual_value_columns
187+
# More columns than labels: Drop the extra columns (assumed to be internal/garbage)
188+
self._value_columns = all_value_columns[: len(self._column_labels)]
189+
# Prune the expression to remove hidden columns
190+
self._expr = self._expr.select_columns(
191+
[*self.index_columns, *self._value_columns]
192+
)
193+
elif len(all_value_columns) < len(self._column_labels):
194+
# Fewer columns than labels: Truncate labels
195+
self._value_columns = all_value_columns
196+
self._column_labels = self._column_labels[: len(self._value_columns)]
197+
else:
198+
self._value_columns = all_value_columns
200199

201200
# col_id -> [stat_name -> scalar]
202201
# TODO: Preserve cache under safe transforms (eg. drop column, reorder)

bigframes/core/expression.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,10 @@ def bind_refs(
316316
if self.id in bindings.keys():
317317
return bindings[self.id]
318318
elif not allow_partial_bindings:
319+
# DEBUG: Print details about the failure
320+
print(f"DEBUG: bind_refs failed for {self.id}")
321+
print(f"DEBUG: available bindings: {list(bindings.keys())}")
322+
# print(f"DEBUG: full expression: {self}")
319323
raise ValueError(f"Variable {self.id} remains unbound")
320324
return self
321325

0 commit comments

Comments
 (0)