Skip to content

Commit 98c7b88

Browse files
committed
feat(display): improve display of nested data types
1 parent 700f96c commit 98c7b88

File tree

4 files changed

+356
-306
lines changed

4 files changed

+356
-306
lines changed

bigframes/display/html.py

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@
3030

3131
def _flatten_nested_data(
3232
dataframe: pd.DataFrame,
33-
) -> Tuple[pd.DataFrame, Dict[str, List[int]]]:
33+
) -> Tuple[pd.DataFrame, Dict[str, List[int]], List[str]]:
3434
"""Flatten nested STRUCT and ARRAY columns for display."""
3535
if dataframe.empty:
36-
return dataframe.copy(), {}
36+
return dataframe.copy(), {}, []
3737

3838
result_df = dataframe.copy()
39+
initial_columns = list(result_df.columns)
3940

4041
# Attempt to parse JSON-like strings into structs or arrays
4142
for col_name in result_df.columns:
@@ -60,22 +61,25 @@ def _flatten_nested_data(
6061
struct_columns: List[str] = []
6162
array_columns: List[str] = []
6263
array_of_struct_columns: List[str] = []
64+
clear_on_continuation_cols: List[str] = []
6365

6466
for col_name_raw, col_data in result_df.items():
6567
col_name = str(col_name_raw)
66-
# Fix: Use isinstance for proper type narrowing
6768
dtype = col_data.dtype
6869
if isinstance(dtype, pd.ArrowDtype):
6970
pa_type = dtype.pyarrow_dtype
7071
if pa.types.is_struct(pa_type):
7172
struct_columns.append(col_name)
7273
elif pa.types.is_list(pa_type):
7374
array_columns.append(col_name)
74-
# Check if it's an ARRAY of STRUCT
7575
if hasattr(pa_type, "value_type") and pa.types.is_struct(
7676
pa_type.value_type
7777
):
7878
array_of_struct_columns.append(col_name)
79+
else:
80+
clear_on_continuation_cols.append(col_name)
81+
elif col_name in initial_columns:
82+
clear_on_continuation_cols.append(col_name)
7983

8084
# Handle ARRAY of STRUCT columns first
8185
for col_name in array_of_struct_columns:
@@ -123,6 +127,7 @@ def _flatten_nested_data(
123127
for field_idx in range(pa_type.num_fields):
124128
field = pa_type.field(field_idx)
125129
new_col_name = f"{col_name}.{field.name}"
130+
clear_on_continuation_cols.append(new_col_name)
126131

127132
regular_field_values: List[Any] = []
128133
for val in col_data:
@@ -137,36 +142,39 @@ def _flatten_nested_data(
137142

138143
# Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
139144
if not array_columns:
140-
return result_df, array_row_groups
145+
return result_df, array_row_groups, clear_on_continuation_cols
141146

142-
# Find the maximum length of all array columns
143-
max_array_length = 0
144-
for col_name in array_columns:
145-
col_data = result_df[col_name]
146-
for val in col_data:
147-
if val is not None and not (
148-
isinstance(val, list) and len(val) == 1 and pd.isna(val[0])
149-
):
150-
max_array_length = max(max_array_length, len(val))
151-
152-
# Create exploded rows
147+
# Find the maximum length of array columns within each row
153148
exploded_rows = []
154149
for orig_idx, row in result_df.iterrows():
155-
# Get array values for this row
156150
array_values = {}
151+
max_len_in_row = 0
152+
non_na_array_found = False
153+
157154
for col_name in array_columns:
158155
val = row[col_name]
159-
if val is None or (
156+
if val is not None and not (
160157
isinstance(val, list) and len(val) == 1 and pd.isna(val[0])
161158
):
162-
array_values[col_name] = [pd.NA] * max_array_length
159+
array_values[col_name] = list(val)
160+
max_len_in_row = max(max_len_in_row, len(val))
161+
non_na_array_found = True
163162
else:
164-
array_len = len(val)
165-
padded_val = list(val) + [pd.NA] * (max_array_length - array_len)
166-
array_values[col_name] = padded_val
163+
array_values[col_name] = []
164+
165+
if not non_na_array_found:
166+
new_row = row.copy()
167+
for col_name in array_columns:
168+
new_row[f"{col_name}"] = pd.NA
169+
exploded_rows.append(new_row)
170+
orig_key = str(orig_idx)
171+
if orig_key not in array_row_groups:
172+
array_row_groups[orig_key] = []
173+
array_row_groups[orig_key].append(len(exploded_rows) - 1)
174+
continue
167175

168-
# Create one row per array element
169-
for array_idx in range(max_array_length):
176+
# Create one row per array element, up to max_len_in_row
177+
for array_idx in range(max_len_in_row):
170178
new_row = row.copy()
171179

172180
# Remove array columns from the row copy
@@ -175,7 +183,10 @@ def _flatten_nested_data(
175183

176184
# Add the specific array element for this index
177185
for col_name in array_columns:
178-
new_row[f"{col_name}"] = array_values[col_name][array_idx]
186+
if array_idx < len(array_values.get(col_name, [])):
187+
new_row[f"{col_name}"] = array_values[col_name][array_idx]
188+
else:
189+
new_row[f"{col_name}"] = pd.NA
179190

180191
exploded_rows.append(new_row)
181192

@@ -187,9 +198,9 @@ def _flatten_nested_data(
187198

188199
if exploded_rows:
189200
exploded_df = pd.DataFrame(exploded_rows)
190-
return exploded_df, array_row_groups
201+
return exploded_df, array_row_groups, clear_on_continuation_cols
191202
else:
192-
return result_df, array_row_groups
203+
return result_df, array_row_groups, clear_on_continuation_cols
193204

194205

195206
def _is_dtype_numeric(dtype) -> bool:
@@ -204,7 +215,11 @@ def render_html(
204215
) -> str:
205216
"""Render a pandas DataFrame to HTML with specific styling and nested data support."""
206217
# Flatten nested data first
207-
flattened_df, array_row_groups = _flatten_nested_data(dataframe)
218+
(
219+
flattened_df,
220+
array_row_groups,
221+
clear_on_continuation,
222+
) = _flatten_nested_data(dataframe)
208223

209224
classes = "dataframe table table-striped table-hover"
210225
table_html = [f'<table border="1" class="{classes}" id="{table_id}">']
@@ -226,10 +241,12 @@ def render_html(
226241
# Determine if this is an array continuation row
227242
row_class = ""
228243
orig_row_idx = None
244+
is_continuation = False
229245
for orig_key, row_indices in array_row_groups.items():
230246
if i in row_indices and row_indices[0] != i:
231247
row_class = "array-continuation"
232248
orig_row_idx = orig_key
249+
is_continuation = True
233250
break
234251

235252
if row_class:
@@ -241,14 +258,18 @@ def render_html(
241258

242259
row = flattened_df.iloc[i]
243260
for col_name, value in row.items():
261+
col_name_str = str(col_name)
262+
if is_continuation and col_name_str in clear_on_continuation:
263+
table_html.append(' <td style="padding: 0.5em;"></td>')
264+
continue
244265
dtype = flattened_df.dtypes.loc[col_name] # type: ignore
245266
align = "right" if _is_dtype_numeric(dtype) else "left"
246267
table_html.append(
247268
' <td style="text-align: {}; padding: 0.5em;">'.format(align)
248269
)
249270

250271
if pandas.api.types.is_scalar(value) and pd.isna(value):
251-
table_html.append(' <em style="color: gray;">&lt;NA&gt;</em>')
272+
table_html.append(" ")
252273
else:
253274
if isinstance(value, float):
254275
formatted_value = f"{value:.{precision}f}"

0 commit comments

Comments
 (0)