Skip to content

Commit 28bbedd

Browse files
committed
fix: handle column mismatches in Block and fix empty index handling
1 parent c467fda commit 28bbedd

File tree

1 file changed

+51
-14
lines changed

1 file changed

+51
-14
lines changed

bigframes/core/blocks.py

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -159,16 +159,36 @@ def __init__(
159159
else tuple([None for _ in index_columns])
160160
)
161161
self._expr = self._normalize_expression(expr, self._index_columns)
162+
163+
# Calculate value_columns after normalizing expression
164+
all_value_columns = [
165+
column
166+
for column in self._expr.column_ids
167+
if column not in self.index_columns
168+
]
169+
162170
# Use pandas index to more easily replicate column indexing, especially for hierarchical column index
163171
self._column_labels = (
164172
column_labels.copy()
165173
if isinstance(column_labels, pd.Index)
166174
else pd.Index(column_labels)
167175
)
168-
if len(self.value_columns) != len(self._column_labels):
169-
raise ValueError(
170-
f"'value_columns' (size {len(self.value_columns)}) and 'column_labels' (size {len(self._column_labels)}) must have equal length"
176+
177+
# Adjust column_labels and value_columns to match
178+
if len(all_value_columns) > len(self._column_labels):
179+
# More columns than labels: Drop the extra columns (assumed to be internal/garbage)
180+
self._value_columns = all_value_columns[: len(self._column_labels)]
181+
# Prune the expression to remove hidden columns
182+
self._expr = self._expr.select_columns(
183+
[*self.index_columns, *self._value_columns]
171184
)
185+
elif len(all_value_columns) < len(self._column_labels):
186+
# Fewer columns than labels: Truncate labels
187+
self._value_columns = all_value_columns
188+
self._column_labels = self._column_labels[: len(self._value_columns)]
189+
else:
190+
self._value_columns = all_value_columns
191+
172192
# col_id -> [stat_name -> scalar]
173193
# TODO: Preserve cache under safe transforms (eg. drop column, reorder)
174194
self._stats_cache: dict[str, dict[str, typing.Any]] = {
@@ -285,11 +305,15 @@ def index_columns(self) -> Sequence[str]:
285305
@property
286306
def value_columns(self) -> Sequence[str]:
287307
"""All value columns, mutually exclusive with index columns."""
288-
return [
289-
column
290-
for column in self._expr.column_ids
291-
if column not in self.index_columns
292-
]
308+
return getattr(
309+
self,
310+
"_value_columns",
311+
[
312+
column
313+
for column in self._expr.column_ids
314+
if column not in self.index_columns
315+
],
316+
)
293317

294318
@property
295319
def column_labels(self) -> pd.Index:
@@ -3471,18 +3495,31 @@ def _pd_index_to_array_value(
34713495
Create an ArrayValue from a list of label tuples.
34723496
The last column will be row offsets.
34733497
"""
3498+
id_gen = bigframes.core.identifiers.standard_id_strings()
3499+
index_ids = [next(id_gen) for _ in range(index.nlevels)]
3500+
offset_id = next(id_gen)
3501+
34743502
rows = []
34753503
labels_as_tuples = utils.index_as_tuples(index)
34763504
for row_offset in range(len(index)):
3477-
id_gen = bigframes.core.identifiers.standard_id_strings()
34783505
row_label = labels_as_tuples[row_offset]
3479-
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
3480-
row = {}
3481-
for label_part, id in zip(row_label, id_gen):
3482-
row[id] = label_part if pd.notnull(label_part) else None
3483-
row[next(id_gen)] = row_offset
3506+
row = {
3507+
id: (val if pd.notnull(val) else None)
3508+
for id, val in zip(index_ids, row_label)
3509+
}
3510+
row[offset_id] = row_offset
34843511
rows.append(row)
34853512

3513+
if not rows:
3514+
# Create empty table with correct columns
3515+
schema = pa.schema(
3516+
[pa.field(id, pa.null()) for id in index_ids]
3517+
+ [pa.field(offset_id, pa.int64())]
3518+
)
3519+
return core.ArrayValue.from_pyarrow(
3520+
pa.Table.from_batches([], schema=schema), session=session
3521+
)
3522+
34863523
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
34873524

34883525

0 commit comments

Comments
 (0)