Skip to content

Commit bd56992

Browse files
committed
refactor: move display logic to display/plaintext.py and display/html.py
1 parent a474606 commit bd56992

File tree

4 files changed

+122
-133
lines changed

4 files changed

+122
-133
lines changed

bigframes/dataframe.py

Lines changed: 3 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import datetime
2020
import inspect
2121
import itertools
22-
import json
2322
import re
2423
import sys
2524
import textwrap
@@ -54,7 +53,6 @@
5453
import pyarrow
5554
import tabulate
5655

57-
import bigframes._config.display_options as display_options
5856
import bigframes.constants
5957
import bigframes.core
6058
from bigframes.core import agg_expressions, log_adapter
@@ -790,6 +788,9 @@ def __repr__(self) -> str:
790788
if opts.repr_mode == "deferred":
791789
return formatter.repr_query_job(self._compute_dry_run())
792790

791+
# TODO(swast): pass max_columns and get the true column count back. Maybe
792+
# get 1 more column than we have requested so that pandas can add the
793+
# ... for us?
793794
max_results = opts.max_rows
794795
pandas_df, row_count, query_job = self._block.retrieve_repr_request_results(
795796
max_results
@@ -826,96 +827,6 @@ def _repr_mimebundle_(self, include=None, exclude=None):
826827

827828
return html.repr_mimebundle(self, include=include, exclude=exclude)
828829

829-
def _create_text_representation(
830-
self,
831-
pandas_df: pandas.DataFrame,
832-
total_rows: typing.Optional[int],
833-
) -> str:
834-
"""Create a text representation of the DataFrame."""
835-
opts = bigframes.options.display
836-
with display_options.pandas_repr(opts):
837-
import pandas.io.formats
838-
839-
to_string_kwargs = (
840-
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
841-
)
842-
if not self._has_index:
843-
to_string_kwargs.update({"index": False})
844-
to_string_kwargs.update({"show_dimensions": False})
845-
repr_string = pandas_df.to_string(**to_string_kwargs)
846-
847-
lines = repr_string.split("\n")
848-
is_truncated = total_rows is not None and total_rows > len(pandas_df)
849-
850-
if is_truncated:
851-
lines.append("...")
852-
lines.append("") # Add empty line for spacing only if truncated
853-
column_count = len(self.columns)
854-
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
855-
else:
856-
# For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False
857-
column_count = len(self.columns)
858-
lines.append("")
859-
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
860-
861-
return "\n".join(lines)
862-
863-
def _create_html_representation(
864-
self,
865-
pandas_df: pandas.DataFrame,
866-
row_count: int,
867-
column_count: int,
868-
blob_cols: list[str],
869-
) -> str:
870-
"""Create an HTML representation of the DataFrame."""
871-
opts = bigframes.options.display
872-
with display_options.pandas_repr(opts):
873-
# TODO(shuowei, b/464053870): Escaping HTML would be useful, but
874-
# `escape=False` is needed to show images. We may need to implement
875-
# a full-fledged repr module to better support types not in pandas.
876-
if bigframes.options.display.blob_display and blob_cols:
877-
878-
def obj_ref_rt_to_html(obj_ref_rt) -> str:
879-
obj_ref_rt_json = json.loads(obj_ref_rt)
880-
obj_ref_details = obj_ref_rt_json["objectref"]["details"]
881-
if "gcs_metadata" in obj_ref_details:
882-
gcs_metadata = obj_ref_details["gcs_metadata"]
883-
content_type = typing.cast(
884-
str, gcs_metadata.get("content_type", "")
885-
)
886-
if content_type.startswith("image"):
887-
size_str = ""
888-
if bigframes.options.display.blob_display_width:
889-
size_str = f' width="{bigframes.options.display.blob_display_width}"'
890-
if bigframes.options.display.blob_display_height:
891-
size_str = (
892-
size_str
893-
+ f' height="{bigframes.options.display.blob_display_height}"'
894-
)
895-
url = obj_ref_rt_json["access_urls"]["read_url"]
896-
return f'<img src="{url}"{size_str}>'
897-
898-
return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
899-
900-
formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
901-
902-
# set max_colwidth so not to truncate the image url
903-
with pandas.option_context("display.max_colwidth", None):
904-
html_string = pandas_df.to_html(
905-
escape=False,
906-
notebook=True,
907-
max_rows=pandas.get_option("display.max_rows"),
908-
max_cols=pandas.get_option("display.max_columns"),
909-
show_dimensions=pandas.get_option("display.show_dimensions"),
910-
formatters=formatters, # type: ignore
911-
)
912-
else:
913-
# _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
914-
html_string = pandas_df._repr_html_() # type:ignore
915-
916-
html_string += f"[{row_count} rows x {column_count} columns in total]"
917-
return html_string
918-
919830
def __delitem__(self, key: str):
920831
df = self.drop(columns=[key])
921832
self._set_block(df._get_block())

bigframes/display/html.py

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import annotations
1818

1919
import html
20+
import json
2021
import traceback
2122
import typing
2223
from typing import Any, Union
@@ -26,7 +27,7 @@
2627
import pandas.api.types
2728

2829
import bigframes
29-
from bigframes._config import options
30+
from bigframes._config import display_options, options
3031
from bigframes.display import plaintext
3132

3233
if typing.TYPE_CHECKING:
@@ -110,12 +111,6 @@ def create_html_representation(
110111
blob_cols: list[str],
111112
) -> str:
112113
"""Create an HTML representation of the DataFrame or Series."""
113-
# Note: We need to import Series here to avoid circular imports, but only if we use isinstance.
114-
# To check if it is a Series without importing, we can check if it has the _repr_html_ method
115-
# or rely on duck typing. However, the original code used isinstance.
116-
# Let's import inside the function if needed, or rely on attribute checks.
117-
# But wait, type checking imports are not available at runtime.
118-
# We can check __class__.__name__ or similar, or just import locally.
119114
from bigframes.series import Series
120115

121116
if isinstance(obj, Series):
@@ -124,12 +119,63 @@ def create_html_representation(
124119
html_string = pd_series._repr_html_()
125120
except AttributeError:
126121
html_string = f"<pre>{pd_series.to_string()}</pre>"
122+
123+
# Series doesn't typically show total rows/cols like DF in HTML repr here?
124+
# But let's check what it was doing.
125+
# Original code just returned _repr_html_ or wrapped to_string.
126+
# It didn't append row/col count string for Series (wait, Series usually has length in repr).
127+
return html_string
127128
else:
128129
# It's a DataFrame
129-
html_string = obj._create_html_representation(
130-
pandas_df, total_rows, total_columns, blob_cols
131-
)
132-
return html_string
130+
opts = options.display
131+
with display_options.pandas_repr(opts):
132+
# TODO(shuowei, b/464053870): Escaping HTML would be useful, but
133+
# `escape=False` is needed to show images. We may need to implement
134+
# a full-fledged repr module to better support types not in pandas.
135+
if options.display.blob_display and blob_cols:
136+
137+
def obj_ref_rt_to_html(obj_ref_rt) -> str:
138+
obj_ref_rt_json = json.loads(obj_ref_rt)
139+
obj_ref_details = obj_ref_rt_json["objectref"]["details"]
140+
if "gcs_metadata" in obj_ref_details:
141+
gcs_metadata = obj_ref_details["gcs_metadata"]
142+
content_type = typing.cast(
143+
str, gcs_metadata.get("content_type", "")
144+
)
145+
if content_type.startswith("image"):
146+
size_str = ""
147+
if options.display.blob_display_width:
148+
size_str = (
149+
f' width="{options.display.blob_display_width}"'
150+
)
151+
if options.display.blob_display_height:
152+
size_str = (
153+
size_str
154+
+ f' height="{options.display.blob_display_height}"'
155+
)
156+
url = obj_ref_rt_json["access_urls"]["read_url"]
157+
return f'<img src="{url}"{size_str}>'
158+
159+
return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
160+
161+
formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
162+
163+
# set max_colwidth so not to truncate the image url
164+
with pandas.option_context("display.max_colwidth", None):
165+
html_string = pandas_df.to_html(
166+
escape=False,
167+
notebook=True,
168+
max_rows=pandas.get_option("display.max_rows"),
169+
max_cols=pandas.get_option("display.max_columns"),
170+
show_dimensions=pandas.get_option("display.show_dimensions"),
171+
formatters=formatters, # type: ignore
172+
)
173+
else:
174+
# _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
175+
html_string = pandas_df._repr_html_() # type:ignore
176+
177+
html_string += f"[{total_rows} rows x {total_columns} columns in total]"
178+
return html_string
133179

134180

135181
def get_anywidget_bundle(
@@ -189,7 +235,7 @@ def repr_mimebundle(
189235
"""
190236
from bigframes.series import Series
191237

192-
opts = bigframes.options.display
238+
opts = options.display
193239
if opts.repr_mode == "anywidget":
194240
try:
195241
return get_anywidget_bundle(obj, include=include, exclude=exclude)

bigframes/display/plaintext.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
import typing
2020
from typing import Union
2121

22+
import pandas
23+
import pandas.io.formats
24+
25+
from bigframes._config import display_options, options
26+
2227
if typing.TYPE_CHECKING:
2328
import pandas as pd
2429

@@ -32,6 +37,55 @@ def create_text_representation(
3237
total_rows: typing.Optional[int],
3338
) -> str:
3439
"""Create a text representation of the DataFrame or Series."""
35-
# TODO(swast): This module should probably just be removed and combined
36-
# with the html module.
37-
return obj._create_text_representation(pandas_df, total_rows)
40+
from bigframes.series import Series
41+
42+
opts = options.display
43+
44+
if isinstance(obj, Series):
45+
with display_options.pandas_repr(opts):
46+
pd_series = pandas_df.iloc[:, 0]
47+
if len(obj._block.index_columns) == 0:
48+
repr_string = pd_series.to_string(
49+
length=False, index=False, name=True, dtype=True
50+
)
51+
else:
52+
repr_string = pd_series.to_string(length=False, name=True, dtype=True)
53+
54+
lines = repr_string.split("\n")
55+
is_truncated = total_rows is not None and total_rows > len(pandas_df)
56+
57+
if is_truncated:
58+
lines.append("...")
59+
lines.append("") # Add empty line for spacing only if truncated
60+
lines.append(f"[{total_rows} rows]")
61+
62+
return "\n".join(lines)
63+
64+
else:
65+
# DataFrame
66+
with display_options.pandas_repr(opts):
67+
# safe to mutate this, this dict is owned by this code, and does not affect global config
68+
to_string_kwargs = (
69+
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
70+
)
71+
if not obj._has_index:
72+
to_string_kwargs.update({"index": False})
73+
74+
# We add our own dimensions string, so don't want pandas to.
75+
to_string_kwargs.update({"show_dimensions": False})
76+
repr_string = pandas_df.to_string(**to_string_kwargs)
77+
78+
lines = repr_string.split("\n")
79+
is_truncated = total_rows is not None and total_rows > len(pandas_df)
80+
81+
if is_truncated:
82+
lines.append("...")
83+
lines.append("") # Add empty line for spacing only if truncated
84+
column_count = len(obj.columns)
85+
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
86+
else:
87+
# For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False
88+
column_count = len(obj.columns)
89+
lines.append("")
90+
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
91+
return "\n".join(lines)

bigframes/series.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -568,32 +568,6 @@ def reset_index(
568568
block = block.assign_label(self._value_column, name)
569569
return bigframes.dataframe.DataFrame(block)
570570

571-
def _create_text_representation(
572-
self,
573-
pandas_df: pandas.DataFrame,
574-
total_rows: typing.Optional[int],
575-
) -> str:
576-
"""Create a text representation of the Series."""
577-
opts = bigframes.options.display
578-
with bigframes._config.display_options.pandas_repr(opts):
579-
pd_series = pandas_df.iloc[:, 0]
580-
if len(self._block.index_columns) == 0:
581-
repr_string = pd_series.to_string(
582-
length=False, index=False, name=True, dtype=True
583-
)
584-
else:
585-
repr_string = pd_series.to_string(length=False, name=True, dtype=True)
586-
587-
lines = repr_string.split("\n")
588-
is_truncated = total_rows is not None and total_rows > len(pandas_df)
589-
590-
if is_truncated:
591-
lines.append("...")
592-
lines.append("") # Add empty line for spacing only if truncated
593-
lines.append(f"[{total_rows} rows]")
594-
595-
return "\n".join(lines)
596-
597571
def _repr_mimebundle_(self, include=None, exclude=None):
598572
"""
599573
Custom display method for IPython/Jupyter environments.
@@ -609,6 +583,10 @@ def __repr__(self) -> str:
609583
if not hasattr(self, "_block"):
610584
return object.__repr__(self)
611585

586+
# TODO(swast): Add a timeout here? If the query is taking a long time,
587+
# maybe we just print the job metadata that we have so far?
588+
# TODO(swast): Avoid downloading the whole series by using job
589+
# metadata, like we do with DataFrame.
612590
opts = bigframes.options.display
613591
if opts.repr_mode == "deferred":
614592
return formatter.repr_query_job(self._compute_dry_run())

0 commit comments

Comments
 (0)