File tree Expand file tree Collapse file tree 1 file changed +18
-1
lines changed
Expand file tree Collapse file tree 1 file changed +18
-1
lines changed Original file line number Diff line number Diff line change @@ -220,7 +220,24 @@ def _explode_array_columns(
220220 exploded_dfs = []
221221 for col in array_columns :
222222 # Explode each array column individually
223- exploded = work_df [non_array_columns + [col ]].explode (col )
223+ col_series = work_df [col ]
224+ target_dtype = None
225+ if isinstance (col_series .dtype , pd .ArrowDtype ):
226+ pa_type = col_series .dtype .pyarrow_dtype
227+ if pa .types .is_list (pa_type ):
228+ target_dtype = pd .ArrowDtype (pa_type .value_type )
229+ # Use to_list() to avoid pandas attempting to create a 2D numpy
230+ # array if the list elements have the same length.
231+ col_series = pd .Series (
232+ col_series .to_list (), index = col_series .index , dtype = object
233+ )
234+
235+ exploded = work_df [non_array_columns ].assign (** {col : col_series }).explode (col )
236+
237+ if target_dtype is not None :
238+ # Re-cast to arrow dtype if possible
239+ exploded [col ] = exploded [col ].astype (target_dtype )
240+
224241 exploded ["_row_num" ] = exploded .groupby (non_array_columns ).cumcount ()
225242 exploded_dfs .append (exploded )
226243
You can’t perform that action at this time.
0 commit comments