fix: fix the length mismatch

shuoweil · shuoweil · commit 991a36374941 · 2026-01-12T21:36:34.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -3432,18 +3432,6 @@ def unpivot(
             array_value, type="cross"
         )
     new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
-    # Last column is offsets
-    if not labels_array.column_ids:
-        # Handle empty column_ids case for multimodal DataFrames
-        # When no index columns exist, return original array_value with identity mappings
-        value_cols = [
-            col for col in array_value.column_ids if col not in passthrough_columns
-        ]
-        return array_value, (
-            tuple(),
-            tuple(value_cols),
-            tuple(passthrough_columns),
-        )
     index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
     explode_offsets_id = labels_mapping[labels_array.column_ids[-1]]
 
@@ -3453,6 +3441,10 @@ def unpivot(
     for input_ids in unpivot_columns:
         # row explode offset used to choose the input column
         # we use offset instead of label as labels are not necessarily unique
+        if not input_ids:
+            unpivot_exprs.append(ex.const(None))
+            continue
+
         cases = itertools.chain(
             *(
                 (
@@ -3482,19 +3474,31 @@ def _pd_index_to_array_value(
     Create an ArrayValue from a list of label tuples.
     The last column will be row offsets.
     """
+    id_gen = bigframes.core.identifiers.standard_id_strings()
+    index_ids = [next(id_gen) for _ in range(index.nlevels)]
+    offset_id = next(id_gen)
 
     rows = []
     labels_as_tuples = utils.index_as_tuples(index)
     for row_offset in range(len(index)):
-        id_gen = bigframes.core.identifiers.standard_id_strings()
         row_label = labels_as_tuples[row_offset]
-        row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
-        row = {}
-        for label_part, id in zip(row_label, id_gen):
-            row[id] = label_part if pd.notnull(label_part) else None
-        row[next(id_gen)] = row_offset
+        row = {
+            id: (val if pd.notnull(val) else None)
+            for id, val in zip(index_ids, row_label)
+        }
+        row[offset_id] = row_offset
         rows.append(row)
 
+    if not rows:
+        # Create empty table with correct columns
+        schema = pa.schema(
+            [pa.field(id, pa.null()) for id in index_ids]
+            + [pa.field(offset_id, pa.int64())]
+        )
+        return core.ArrayValue.from_pyarrow(
+            pa.Table.from_batches([], schema=schema), session=session
+        )
+
     return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
 
 
diff --git a/bigframes/pandas/core/methods/describe.py b/bigframes/pandas/core/methods/describe.py
@@ -17,6 +17,7 @@
 import typing
 
 import pandas as pd
+import pyarrow as pa
 
 from bigframes import dataframe, dtypes, series
 from bigframes.core import agg_expressions, blocks
@@ -86,9 +87,13 @@ def _describe(
         if include != "all" and dtype not in _DEFAULT_DTYPES:
             continue
         agg_ops = _get_aggs_for_dtype(dtype)
-        stats.extend(op.as_expr(col_id) for op in agg_ops)
-        label_tuple = (label,) if block.column_labels.nlevels == 1 else label
-        column_labels.extend((*label_tuple, op.name) for op in agg_ops)  # type: ignore
+
+        label_tuple = (
+            (label,) if block.column_labels.nlevels == 1 else typing.cast(tuple, label)
+        )
+        for op in agg_ops:
+            stats.append(op.as_expr(col_id))
+            column_labels.append((*label_tuple, op.name))
 
     agg_block = block.aggregate(
         by_column_ids=by_col_ids,
@@ -100,7 +105,7 @@ def _describe(
 
 
 def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
-    if dtype in dtypes.NUMERIC_BIGFRAMES_TYPES_RESTRICTIVE:
+    if dtypes.is_numeric(dtype, include_bool=False):
         return [
             aggregations.count_op,
             aggregations.mean_op,
@@ -111,14 +116,18 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
             aggregations.ApproxQuartilesOp(3),
             aggregations.max_op,
         ]
-    elif dtype in dtypes.TEMPORAL_NUMERIC_BIGFRAMES_TYPES:
+    elif dtypes.is_datetime_like(dtype) or dtypes.is_date_like(dtype):
         return [aggregations.count_op]
-    elif dtype in [
-        dtypes.STRING_DTYPE,
-        dtypes.BOOL_DTYPE,
-        dtypes.BYTES_DTYPE,
-        dtypes.TIME_DTYPE,
-    ]:
+    elif (
+        dtypes.is_string_like(dtype)
+        or dtypes.is_binary_like(dtype)
+        or dtypes.is_time_like(dtype)
+        or (
+            isinstance(dtype, pd.ArrowDtype)
+            and pa.types.is_struct(dtype.pyarrow_dtype)
+            and dtype != dtypes.OBJ_REF_DTYPE
+        )
+    ):
         return [aggregations.count_op, aggregations.nunique_op]
     else:
-        return []
+        return [aggregations.count_op]
diff --git a/tests/unit/core/test_blocks_unpivot.py b/tests/unit/core/test_blocks_unpivot.py
@@ -30,30 +30,32 @@ def mock_session():
 def test_pd_index_to_array_value_with_empty_index_creates_no_columns(mock_session):
     """
     Tests that `_pd_index_to_array_value` with an empty pandas Index creates
-    an ArrayValue with no columns.
+    an ArrayValue with the expected number of columns (index level + offset).
     """
     empty_index = pd.Index([], name="test")
 
     array_val = blocks._pd_index_to_array_value(mock_session, empty_index)
 
-    assert len(array_val.column_ids) == 0
+    # 1 index level + 1 offset column
+    assert len(array_val.column_ids) == 2
 
 
 def test_pd_index_to_array_value_with_empty_multiindex_creates_no_columns(mock_session):
     """
     Tests that `_pd_index_to_array_value` with an empty pandas MultiIndex creates
-    an ArrayValue with no columns.
+    an ArrayValue with the expected number of columns (index levels + offset).
     """
     empty_index = pd.MultiIndex.from_arrays([[], []], names=["a", "b"])
 
     array_val = blocks._pd_index_to_array_value(mock_session, empty_index)
 
-    assert len(array_val.column_ids) == 0
+    # 2 index levels + 1 offset column
+    assert len(array_val.column_ids) == 3
 
 
 def test_unpivot_with_empty_row_labels(mock_session):
     """
-    Tests that `unpivot` handles an empty `row_labels` index correctly.
+    Tests that `unpivot` handles an empty `row_labels` index correctly by producing 0 rows.
     """
     import pyarrow as pa
 
@@ -70,9 +72,8 @@ def test_unpivot_with_empty_row_labels(mock_session):
         passthrough_columns=["b"],
     )
 
-    # The expected behavior is that the unpivot operation does nothing and returns
-    # the original array_value and identity mappings.
-    assert unpivot_result is array_value
-    assert index_cols == tuple()
-    assert value_cols == ("a",)
+    # The expected behavior is that the unpivot operation produces 0 rows.
+    assert unpivot_result is not array_value
+    assert index_cols == ("col_0",)
+    assert len(value_cols) == 1
     assert passthrough_cols == ("b",)