3030
3131def _flatten_nested_data (
3232 dataframe : pd .DataFrame ,
33- ) -> Tuple [pd .DataFrame , Dict [str , List [int ]]]:
33+ ) -> Tuple [pd .DataFrame , Dict [str , List [int ]], List [ str ] ]:
3434 """Flatten nested STRUCT and ARRAY columns for display."""
3535 if dataframe .empty :
36- return dataframe .copy (), {}
36+ return dataframe .copy (), {}, []
3737
3838 result_df = dataframe .copy ()
39+ initial_columns = list (result_df .columns )
3940
4041 # Attempt to parse JSON-like strings into structs or arrays
4142 for col_name in result_df .columns :
@@ -60,22 +61,25 @@ def _flatten_nested_data(
6061 struct_columns : List [str ] = []
6162 array_columns : List [str ] = []
6263 array_of_struct_columns : List [str ] = []
64+ clear_on_continuation_cols : List [str ] = []
6365
6466 for col_name_raw , col_data in result_df .items ():
6567 col_name = str (col_name_raw )
66- # Fix: Use isinstance for proper type narrowing
6768 dtype = col_data .dtype
6869 if isinstance (dtype , pd .ArrowDtype ):
6970 pa_type = dtype .pyarrow_dtype
7071 if pa .types .is_struct (pa_type ):
7172 struct_columns .append (col_name )
7273 elif pa .types .is_list (pa_type ):
7374 array_columns .append (col_name )
74- # Check if it's an ARRAY of STRUCT
7575 if hasattr (pa_type , "value_type" ) and pa .types .is_struct (
7676 pa_type .value_type
7777 ):
7878 array_of_struct_columns .append (col_name )
79+ else :
80+ clear_on_continuation_cols .append (col_name )
81+ elif col_name in initial_columns :
82+ clear_on_continuation_cols .append (col_name )
7983
8084 # Handle ARRAY of STRUCT columns first
8185 for col_name in array_of_struct_columns :
@@ -123,6 +127,7 @@ def _flatten_nested_data(
123127 for field_idx in range (pa_type .num_fields ):
124128 field = pa_type .field (field_idx )
125129 new_col_name = f"{ col_name } .{ field .name } "
130+ clear_on_continuation_cols .append (new_col_name )
126131
127132 regular_field_values : List [Any ] = []
128133 for val in col_data :
@@ -137,36 +142,39 @@ def _flatten_nested_data(
137142
138143 # Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
139144 if not array_columns :
140- return result_df , array_row_groups
145+ return result_df , array_row_groups , clear_on_continuation_cols
141146
142- # Find the maximum length of all array columns
143- max_array_length = 0
144- for col_name in array_columns :
145- col_data = result_df [col_name ]
146- for val in col_data :
147- if val is not None and not (
148- isinstance (val , list ) and len (val ) == 1 and pd .isna (val [0 ])
149- ):
150- max_array_length = max (max_array_length , len (val ))
151-
152- # Create exploded rows
147+ # Find the maximum length of array columns within each row
153148 exploded_rows = []
154149 for orig_idx , row in result_df .iterrows ():
155- # Get array values for this row
156150 array_values = {}
151+ max_len_in_row = 0
152+ non_na_array_found = False
153+
157154 for col_name in array_columns :
158155 val = row [col_name ]
159- if val is None or (
156+ if val is not None and not (
160157 isinstance (val , list ) and len (val ) == 1 and pd .isna (val [0 ])
161158 ):
162- array_values [col_name ] = [pd .NA ] * max_array_length
159+ array_values [col_name ] = list (val )
160+ max_len_in_row = max (max_len_in_row , len (val ))
161+ non_na_array_found = True
163162 else :
164- array_len = len (val )
165- padded_val = list (val ) + [pd .NA ] * (max_array_length - array_len )
166- array_values [col_name ] = padded_val
163+ array_values [col_name ] = []
164+
165+ if not non_na_array_found :
166+ new_row = row .copy ()
167+ for col_name in array_columns :
168+ new_row [f"{ col_name } " ] = pd .NA
169+ exploded_rows .append (new_row )
170+ orig_key = str (orig_idx )
171+ if orig_key not in array_row_groups :
172+ array_row_groups [orig_key ] = []
173+ array_row_groups [orig_key ].append (len (exploded_rows ) - 1 )
174+ continue
167175
168- # Create one row per array element
169- for array_idx in range (max_array_length ):
176+ # Create one row per array element, up to max_len_in_row
177+ for array_idx in range (max_len_in_row ):
170178 new_row = row .copy ()
171179
172180 # Remove array columns from the row copy
@@ -175,7 +183,10 @@ def _flatten_nested_data(
175183
176184 # Add the specific array element for this index
177185 for col_name in array_columns :
178- new_row [f"{ col_name } " ] = array_values [col_name ][array_idx ]
186+ if array_idx < len (array_values .get (col_name , [])):
187+ new_row [f"{ col_name } " ] = array_values [col_name ][array_idx ]
188+ else :
189+ new_row [f"{ col_name } " ] = pd .NA
179190
180191 exploded_rows .append (new_row )
181192
@@ -187,9 +198,9 @@ def _flatten_nested_data(
187198
188199 if exploded_rows :
189200 exploded_df = pd .DataFrame (exploded_rows )
190- return exploded_df , array_row_groups
201+ return exploded_df , array_row_groups , clear_on_continuation_cols
191202 else :
192- return result_df , array_row_groups
203+ return result_df , array_row_groups , clear_on_continuation_cols
193204
194205
195206def _is_dtype_numeric (dtype ) -> bool :
@@ -204,7 +215,11 @@ def render_html(
204215) -> str :
205216 """Render a pandas DataFrame to HTML with specific styling and nested data support."""
206217 # Flatten nested data first
207- flattened_df , array_row_groups = _flatten_nested_data (dataframe )
218+ (
219+ flattened_df ,
220+ array_row_groups ,
221+ clear_on_continuation ,
222+ ) = _flatten_nested_data (dataframe )
208223
209224 classes = "dataframe table table-striped table-hover"
210225 table_html = [f'<table border="1" class="{ classes } " id="{ table_id } ">' ]
@@ -226,10 +241,12 @@ def render_html(
226241 # Determine if this is an array continuation row
227242 row_class = ""
228243 orig_row_idx = None
244+ is_continuation = False
229245 for orig_key , row_indices in array_row_groups .items ():
230246 if i in row_indices and row_indices [0 ] != i :
231247 row_class = "array-continuation"
232248 orig_row_idx = orig_key
249+ is_continuation = True
233250 break
234251
235252 if row_class :
@@ -241,14 +258,18 @@ def render_html(
241258
242259 row = flattened_df .iloc [i ]
243260 for col_name , value in row .items ():
261+ col_name_str = str (col_name )
262+ if is_continuation and col_name_str in clear_on_continuation :
263+ table_html .append (' <td style="padding: 0.5em;"></td>' )
264+ continue
244265 dtype = flattened_df .dtypes .loc [col_name ] # type: ignore
245266 align = "right" if _is_dtype_numeric (dtype ) else "left"
246267 table_html .append (
247268 ' <td style="text-align: {}; padding: 0.5em;">' .format (align )
248269 )
249270
250271 if pandas .api .types .is_scalar (value ) and pd .isna (value ):
251- table_html .append (' <em style="color: gray;"><NA></em>' )
272+ table_html .append (" " )
252273 else :
253274 if isinstance (value , float ):
254275 formatted_value = f"{ value :.{precision }f} "
0 commit comments