Skip to content

Commit 991a363

Browse files
committed
fix: fix the length mismatch
1 parent 210cb2f commit 991a363

File tree

3 files changed

+54
-40
lines changed

3 files changed

+54
-40
lines changed

bigframes/core/blocks.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3432,18 +3432,6 @@ def unpivot(
34323432
array_value, type="cross"
34333433
)
34343434
new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
3435-
# Last column is offsets
3436-
if not labels_array.column_ids:
3437-
# Handle empty column_ids case for multimodal DataFrames
3438-
# When no index columns exist, return original array_value with identity mappings
3439-
value_cols = [
3440-
col for col in array_value.column_ids if col not in passthrough_columns
3441-
]
3442-
return array_value, (
3443-
tuple(),
3444-
tuple(value_cols),
3445-
tuple(passthrough_columns),
3446-
)
34473435
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
34483436
explode_offsets_id = labels_mapping[labels_array.column_ids[-1]]
34493437

@@ -3453,6 +3441,10 @@ def unpivot(
34533441
for input_ids in unpivot_columns:
34543442
# row explode offset used to choose the input column
34553443
# we use offset instead of label as labels are not necessarily unique
3444+
if not input_ids:
3445+
unpivot_exprs.append(ex.const(None))
3446+
continue
3447+
34563448
cases = itertools.chain(
34573449
*(
34583450
(
@@ -3482,19 +3474,31 @@ def _pd_index_to_array_value(
34823474
Create an ArrayValue from a list of label tuples.
34833475
The last column will be row offsets.
34843476
"""
3477+
id_gen = bigframes.core.identifiers.standard_id_strings()
3478+
index_ids = [next(id_gen) for _ in range(index.nlevels)]
3479+
offset_id = next(id_gen)
34853480

34863481
rows = []
34873482
labels_as_tuples = utils.index_as_tuples(index)
34883483
for row_offset in range(len(index)):
3489-
id_gen = bigframes.core.identifiers.standard_id_strings()
34903484
row_label = labels_as_tuples[row_offset]
3491-
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
3492-
row = {}
3493-
for label_part, id in zip(row_label, id_gen):
3494-
row[id] = label_part if pd.notnull(label_part) else None
3495-
row[next(id_gen)] = row_offset
3485+
row = {
3486+
id: (val if pd.notnull(val) else None)
3487+
for id, val in zip(index_ids, row_label)
3488+
}
3489+
row[offset_id] = row_offset
34963490
rows.append(row)
34973491

3492+
if not rows:
3493+
# Create empty table with correct columns
3494+
schema = pa.schema(
3495+
[pa.field(id, pa.null()) for id in index_ids]
3496+
+ [pa.field(offset_id, pa.int64())]
3497+
)
3498+
return core.ArrayValue.from_pyarrow(
3499+
pa.Table.from_batches([], schema=schema), session=session
3500+
)
3501+
34983502
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
34993503

35003504

bigframes/pandas/core/methods/describe.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import typing
1818

1919
import pandas as pd
20+
import pyarrow as pa
2021

2122
from bigframes import dataframe, dtypes, series
2223
from bigframes.core import agg_expressions, blocks
@@ -86,9 +87,13 @@ def _describe(
8687
if include != "all" and dtype not in _DEFAULT_DTYPES:
8788
continue
8889
agg_ops = _get_aggs_for_dtype(dtype)
89-
stats.extend(op.as_expr(col_id) for op in agg_ops)
90-
label_tuple = (label,) if block.column_labels.nlevels == 1 else label
91-
column_labels.extend((*label_tuple, op.name) for op in agg_ops) # type: ignore
90+
91+
label_tuple = (
92+
(label,) if block.column_labels.nlevels == 1 else typing.cast(tuple, label)
93+
)
94+
for op in agg_ops:
95+
stats.append(op.as_expr(col_id))
96+
column_labels.append((*label_tuple, op.name))
9297

9398
agg_block = block.aggregate(
9499
by_column_ids=by_col_ids,
@@ -100,7 +105,7 @@ def _describe(
100105

101106

102107
def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
103-
if dtype in dtypes.NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE:
108+
if dtypes.is_numeric(dtype, include_bool=False):
104109
return [
105110
aggregations.count_op,
106111
aggregations.mean_op,
@@ -111,14 +116,18 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
111116
aggregations.ApproxQuartilesOp(3),
112117
aggregations.max_op,
113118
]
114-
elif dtype in dtypes.TEMPORAL_NUMERIC_BIGFRAMES_TYPES:
119+
elif dtypes.is_datetime_like(dtype) or dtypes.is_date_like(dtype):
115120
return [aggregations.count_op]
116-
elif dtype in [
117-
dtypes.STRING_DTYPE,
118-
dtypes.BOOL_DTYPE,
119-
dtypes.BYTES_DTYPE,
120-
dtypes.TIME_DTYPE,
121-
]:
121+
elif (
122+
dtypes.is_string_like(dtype)
123+
or dtypes.is_binary_like(dtype)
124+
or dtypes.is_time_like(dtype)
125+
or (
126+
isinstance(dtype, pd.ArrowDtype)
127+
and pa.types.is_struct(dtype.pyarrow_dtype)
128+
and dtype != dtypes.OBJ_REF_DTYPE
129+
)
130+
):
122131
return [aggregations.count_op, aggregations.nunique_op]
123132
else:
124-
return []
133+
return [aggregations.count_op]

tests/unit/core/test_blocks_unpivot.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,30 +30,32 @@ def mock_session():
3030
def test_pd_index_to_array_value_with_empty_index_creates_no_columns(mock_session):
3131
"""
3232
Tests that `_pd_index_to_array_value` with an empty pandas Index creates
33-
an ArrayValue with no columns.
33+
an ArrayValue with the expected number of columns (index level + offset).
3434
"""
3535
empty_index = pd.Index([], name="test")
3636

3737
array_val = blocks._pd_index_to_array_value(mock_session, empty_index)
3838

39-
assert len(array_val.column_ids) == 0
39+
# 1 index level + 1 offset column
40+
assert len(array_val.column_ids) == 2
4041

4142

4243
def test_pd_index_to_array_value_with_empty_multiindex_creates_no_columns(mock_session):
4344
"""
4445
Tests that `_pd_index_to_array_value` with an empty pandas MultiIndex creates
45-
an ArrayValue with no columns.
46+
an ArrayValue with the expected number of columns (index levels + offset).
4647
"""
4748
empty_index = pd.MultiIndex.from_arrays([[], []], names=["a", "b"])
4849

4950
array_val = blocks._pd_index_to_array_value(mock_session, empty_index)
5051

51-
assert len(array_val.column_ids) == 0
52+
# 2 index levels + 1 offset column
53+
assert len(array_val.column_ids) == 3
5254

5355

5456
def test_unpivot_with_empty_row_labels(mock_session):
5557
"""
56-
Tests that `unpivot` handles an empty `row_labels` index correctly.
58+
Tests that `unpivot` handles an empty `row_labels` index correctly by producing 0 rows.
5759
"""
5860
import pyarrow as pa
5961

@@ -70,9 +72,8 @@ def test_unpivot_with_empty_row_labels(mock_session):
7072
passthrough_columns=["b"],
7173
)
7274

73-
# The expected behavior is that the unpivot operation does nothing and returns
74-
# the original array_value and identity mappings.
75-
assert unpivot_result is array_value
76-
assert index_cols == tuple()
77-
assert value_cols == ("a",)
75+
# The expected behavior is that the unpivot operation produces 0 rows.
76+
assert unpivot_result is not array_value
77+
assert index_cols == ("col_0",)
78+
assert len(value_cols) == 1
7879
assert passthrough_cols == ("b",)

0 commit comments

Comments
 (0)