Skip to content

Commit d53eea3

Browse files
committed
code refactor
1 parent 86e0b6a commit d53eea3

File tree

2 files changed

+149
-227
lines changed

2 files changed

+149
-227
lines changed

bigframes/dataframe.py

Lines changed: 58 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -839,78 +839,35 @@ def _repr_html_fallback(self) -> str:
839839
return formatter.repr_query_job(self._compute_dry_run())
840840

841841
# Process blob columns first for non-deferred modes
842-
self._cached()
843-
df = self.copy()
844-
if bigframes.options.display.blob_display:
845-
blob_cols = [
846-
series_name
847-
for series_name, series in df.items()
848-
if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
849-
]
850-
for col in blob_cols:
851-
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
852-
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
853-
else:
854-
blob_cols = []
842+
df, blob_cols = self._process_blob_columns()
855843

856-
# Continue with regular HTML rendering for non-anywidget modes
857-
# TODO(swast): pass max_columns and get the true column count back. Maybe
858-
# get 1 more column than we have requested so that pandas can add the
859-
# ... for us?
860844
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
861845
max_results
862846
)
863847

864848
self._set_internal_query_job(query_job)
865849
column_count = len(pandas_df.columns)
866850

867-
with display_options.pandas_repr(opts):
868-
# Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
869-
if bigframes.options.display.blob_display and blob_cols:
870-
871-
def obj_ref_rt_to_html(obj_ref_rt) -> str:
872-
obj_ref_rt_json = json.loads(obj_ref_rt)
873-
obj_ref_details = obj_ref_rt_json["objectref"]["details"]
874-
if "gcs_metadata" in obj_ref_details:
875-
gcs_metadata = obj_ref_details["gcs_metadata"]
876-
content_type = typing.cast(
877-
str, gcs_metadata.get("content_type", "")
878-
)
879-
if content_type.startswith("image"):
880-
size_str = ""
881-
if bigframes.options.display.blob_display_width:
882-
size_str = f' width="{bigframes.options.display.blob_display_width}"'
883-
if bigframes.options.display.blob_display_height:
884-
size_str = (
885-
size_str
886-
+ f' height="{bigframes.options.display.blob_display_height}"'
887-
)
888-
url = obj_ref_rt_json["access_urls"]["read_url"]
889-
return f'<img src="{url}"{size_str}>'
890-
891-
return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
892-
893-
formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
894-
895-
# set max_colwidth so not to truncate the image url
896-
with pandas.option_context("display.max_colwidth", None):
897-
max_rows = pandas.get_option("display.max_rows")
898-
max_cols = pandas.get_option("display.max_columns")
899-
show_dimensions = pandas.get_option("display.show_dimensions")
900-
html_string = pandas_df.to_html(
901-
escape=False,
902-
notebook=True,
903-
max_rows=max_rows,
904-
max_cols=max_cols,
905-
show_dimensions=show_dimensions,
906-
formatters=formatters, # type: ignore
907-
)
908-
else:
909-
# _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
910-
html_string = pandas_df._repr_html_() # type:ignore
851+
return self._create_html_representation(
852+
pandas_df, row_count, column_count, blob_cols
853+
)
911854

912-
html_string += f"[{row_count} rows x {column_count} columns in total]"
913-
return html_string
855+
def _process_blob_columns(self) -> tuple[DataFrame, list[str]]:
856+
"""Process blob columns for display."""
857+
self._cached()
858+
df = self
859+
blob_cols = []
860+
if bigframes.options.display.blob_display:
861+
blob_cols = [
862+
series_name
863+
for series_name, series in self.items()
864+
if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
865+
]
866+
if blob_cols:
867+
df = self.copy()
868+
for col in blob_cols:
869+
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
870+
return df, blob_cols
914871

915872
def _get_anywidget_bundle(self, include=None, exclude=None):
916873
"""
@@ -919,17 +876,7 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
919876
"""
920877
from bigframes import display
921878

922-
# Process blob columns if needed
923-
self._cached()
924-
df = self.copy()
925-
if bigframes.options.display.blob_display:
926-
blob_cols = [
927-
series_name
928-
for series_name, series in df.items()
929-
if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
930-
]
931-
for col in blob_cols:
932-
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
879+
df, _ = self._process_blob_columns()
933880

934881
# Create and display the widget
935882
widget = display.TableWidget(df)
@@ -946,7 +893,16 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
946893
# the HTML and plain text versions.
947894
widget_repr["text/html"] = widget.table_html
948895

949-
# Re-create the text representation from what we know.
896+
widget_repr["text/plain"] = self._create_text_representation(
897+
widget._cached_data, widget.row_count
898+
)
899+
900+
return widget_repr
901+
902+
def _create_text_representation(
903+
self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
904+
) -> str:
905+
"""Create a text representation of the DataFrame."""
950906
opts = bigframes.options.display
951907
with display_options.pandas_repr(opts):
952908
import pandas.io.formats
@@ -957,19 +913,20 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
957913
)
958914
if not self._has_index:
959915
to_string_kwargs.update({"index": False})
960-
repr_string = widget._cached_data.to_string(**to_string_kwargs)
916+
917+
# We add our own dimensions string, so don't want pandas to.
918+
to_string_kwargs.update({"show_dimensions": False})
919+
repr_string = pandas_df.to_string(**to_string_kwargs)
961920

962921
lines = repr_string.split("\n")
963-
row_count = widget.row_count
964-
if row_count is not None and row_count > len(widget._cached_data):
922+
923+
if total_rows is not None and total_rows > len(pandas_df):
965924
lines.append("...")
966925

967926
lines.append("")
968927
column_count = len(self.columns)
969-
lines.append(f"[{row_count or '?'} rows x {column_count} columns]")
970-
widget_repr["text/plain"] = "\n".join(lines)
971-
972-
return widget_repr
928+
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
929+
return "\n".join(lines)
973930

974931
def _repr_mimebundle_(self, include=None, exclude=None):
975932
"""
@@ -997,27 +954,31 @@ def _repr_mimebundle_(self, include=None, exclude=None):
997954
opts = bigframes.options.display
998955
max_results = opts.max_rows
999956

1000-
# Process blob columns first, logic from _repr_html_fallback
1001-
self._cached()
1002-
df = self.copy()
1003-
if bigframes.options.display.blob_display:
1004-
blob_cols = [
1005-
series_name
1006-
for series_name, series in df.items()
1007-
if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
1008-
]
1009-
for col in blob_cols:
1010-
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
1011-
else:
1012-
blob_cols = []
957+
df, blob_cols = self._process_blob_columns()
1013958

1014959
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
1015960
max_results
1016961
)
1017962
self._set_internal_query_job(query_job)
1018963
column_count = len(pandas_df.columns)
1019964

1020-
# Generate HTML representation
965+
html_string = self._create_html_representation(
966+
pandas_df, row_count, column_count, blob_cols
967+
)
968+
969+
text_representation = self._create_text_representation(pandas_df, row_count)
970+
971+
return {"text/html": html_string, "text/plain": text_representation}
972+
973+
def _create_html_representation(
974+
self,
975+
pandas_df: pandas.DataFrame,
976+
row_count: int,
977+
column_count: int,
978+
blob_cols: list[str],
979+
) -> str:
980+
"""Create an HTML representation of the DataFrame."""
981+
opts = bigframes.options.display
1021982
with display_options.pandas_repr(opts):
1022983
if bigframes.options.display.blob_display and blob_cols:
1023984

@@ -1057,31 +1018,7 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str:
10571018
html_string = pandas_df._repr_html_() # type:ignore
10581019

10591020
html_string += f"[{row_count} rows x {column_count} columns in total]"
1060-
1061-
# Generate text representation
1062-
with display_options.pandas_repr(opts):
1063-
import pandas.io.formats
1064-
1065-
to_string_kwargs = (
1066-
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
1067-
)
1068-
if not self._has_index:
1069-
to_string_kwargs.update({"index": False})
1070-
repr_string = pandas_df.to_string(**to_string_kwargs)
1071-
1072-
lines = repr_string.split("\n")
1073-
pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]")
1074-
if pattern.match(lines[-1]):
1075-
lines = lines[:-2]
1076-
1077-
if row_count > len(lines) - 1:
1078-
lines.append("...")
1079-
1080-
lines.append("")
1081-
lines.append(f"[{row_count} rows x {column_count} columns]")
1082-
text_representation = "\n".join(lines)
1083-
1084-
return {"text/html": html_string, "text/plain": text_representation}
1021+
return html_string
10851022

10861023
def __delitem__(self, key: str):
10871024
df = self.drop(columns=[key])

0 commit comments

Comments
 (0)