@@ -89,13 +89,6 @@ def flatten_nested_data(
8989) -> FlattenResult :
9090 """Flatten nested STRUCT and ARRAY columns for display.
9191
92- This function coordinates the flattening process:
93- 1. Classifies columns into STRUCT, ARRAY, ARRAY-of-STRUCT, and standard types.
94- 2. Flattens ARRAY-of-STRUCT columns into multiple ARRAY columns (one per struct field).
95- This simplifies the subsequent explosion step.
96- 3. Flattens top-level STRUCT columns into separate columns.
97- 4. Explodes all ARRAY columns (original and those from step 2) into multiple rows.
98-
9992 Args:
10093 dataframe: The input DataFrame containing potential nested structures.
10194
@@ -111,6 +104,12 @@ def flatten_nested_data(
111104 nested_columns = set (),
112105 )
113106
107+ # Coordinates the flattening process:
108+ # 1. Classifies columns into STRUCT, ARRAY, ARRAY-of-STRUCT, and standard types.
109+ # 2. Flattens ARRAY-of-STRUCT columns into multiple ARRAY columns (one per struct field).
110+ # This simplifies the subsequent explosion step.
111+ # 3. Flattens top-level STRUCT columns into separate columns.
112+ # 4. Explodes all ARRAY columns (original and those from step 2) into multiple rows.
114113 result_df = dataframe .copy ()
115114
116115 classification = _classify_columns (result_df )
@@ -156,15 +155,14 @@ def _classify_columns(
156155) -> ColumnClassification :
157156 """Identify all STRUCT and ARRAY columns in the DataFrame.
158157
159- It inspects the PyArrow dtype of each column to determine if it is a
160- STRUCT, LIST (Array), or LIST of STRUCTs.
161-
162158 Args:
163159 dataframe: The DataFrame to inspect.
164160
165161 Returns:
166162 A ColumnClassification object containing lists of column names for each category.
167163 """
164+ # Inspects the PyArrow dtype of each column to determine if it is a
165+ # STRUCT, LIST (Array), or LIST of STRUCTs.
168166 initial_columns = list (dataframe .columns )
169167 struct_columns : list [str ] = []
170168 array_columns : list [str ] = []
@@ -283,13 +281,6 @@ def _explode_array_columns(
283281 It handles multiple array columns by ensuring they are exploded in sync
284282 relative to the other columns.
285283
286- Design details:
287- - We group by all non-array columns to maintain context.
288- - `_row_num` is used to track the index within the exploded array, effectively
289- synchronizing multiple arrays if they belong to the same row.
290- - Continuation rows (index > 0 in the explosion) are tracked so we can clear
291- repeated values in the display.
292-
293284 Args:
294285 dataframe: The DataFrame to explode.
295286 array_columns: List of array columns to explode.
@@ -300,6 +291,12 @@ def _explode_array_columns(
300291 if not array_columns :
301292 return ExplodeResult (dataframe , [], set ())
302293
294+ # Implementation details:
295+ # - We group by all non-array columns to maintain context.
296+ # - `_row_num` is used to track the index within the exploded array, effectively
297+ # synchronizing multiple arrays if they belong to the same row.
298+ # - Continuation rows (index > 0 in the explosion) are tracked so we can clear
299+ # repeated values in the display.
303300 original_cols = dataframe .columns .tolist ()
304301 work_df = dataframe
305302
0 commit comments