Skip to content

Commit 68b7fbb

Browse files
committed
fix(display): fix explode on Arrow list columns in flatten_nested_data
1 parent 159d6a5 commit 68b7fbb

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

bigframes/display/_flatten.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,24 @@ def _explode_array_columns(
220220
exploded_dfs = []
221221
for col in array_columns:
222222
# Explode each array column individually
223-
exploded = work_df[non_array_columns + [col]].explode(col)
223+
col_series = work_df[col]
224+
target_dtype = None
225+
if isinstance(col_series.dtype, pd.ArrowDtype):
226+
pa_type = col_series.dtype.pyarrow_dtype
227+
if pa.types.is_list(pa_type):
228+
target_dtype = pd.ArrowDtype(pa_type.value_type)
229+
# Use to_list() to avoid pandas attempting to create a 2D numpy
230+
# array if the list elements have the same length.
231+
col_series = pd.Series(
232+
col_series.to_list(), index=col_series.index, dtype=object
233+
)
234+
235+
exploded = work_df[non_array_columns].assign(**{col: col_series}).explode(col)
236+
237+
if target_dtype is not None:
238+
# Re-cast to arrow dtype if possible
239+
exploded[col] = exploded[col].astype(target_dtype)
240+
224241
exploded["_row_num"] = exploded.groupby(non_array_columns).cumcount()
225242
exploded_dfs.append(exploded)
226243

0 commit comments

Comments
 (0)