refactor: move display logic to display/plaintext.py and display/html.py

shuoweil · shuoweil · commit bd5699279ad9 · 2025-12-23T02:30:17.000Z
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -19,7 +19,6 @@
 import datetime
 import inspect
 import itertools
-import json
 import re
 import sys
 import textwrap
@@ -54,7 +53,6 @@
 import pyarrow
 import tabulate
 
-import bigframes._config.display_options as display_options
 import bigframes.constants
 import bigframes.core
 from bigframes.core import agg_expressions, log_adapter
@@ -790,6 +788,9 @@ def __repr__(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
+        # TODO(swast): pass max_columns and get the true column count back. Maybe
+        # get 1 more column than we have requested so that pandas can add the
+        # ... for us?
         max_results = opts.max_rows
         pandas_df, row_count, query_job = self._block.retrieve_repr_request_results(
             max_results
@@ -826,96 +827,6 @@ def _repr_mimebundle_(self, include=None, exclude=None):
 
         return html.repr_mimebundle(self, include=include, exclude=exclude)
 
-    def _create_text_representation(
-        self,
-        pandas_df: pandas.DataFrame,
-        total_rows: typing.Optional[int],
-    ) -> str:
-        """Create a text representation of the DataFrame."""
-        opts = bigframes.options.display
-        with display_options.pandas_repr(opts):
-            import pandas.io.formats
-
-            to_string_kwargs = (
-                pandas.io.formats.format.get_dataframe_repr_params()  # type: ignore
-            )
-            if not self._has_index:
-                to_string_kwargs.update({"index": False})
-            to_string_kwargs.update({"show_dimensions": False})
-            repr_string = pandas_df.to_string(**to_string_kwargs)
-
-        lines = repr_string.split("\n")
-        is_truncated = total_rows is not None and total_rows > len(pandas_df)
-
-        if is_truncated:
-            lines.append("...")
-            lines.append("")  # Add empty line for spacing only if truncated
-            column_count = len(self.columns)
-            lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
-        else:
-            # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False
-            column_count = len(self.columns)
-            lines.append("")
-            lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
-
-        return "\n".join(lines)
-
-    def _create_html_representation(
-        self,
-        pandas_df: pandas.DataFrame,
-        row_count: int,
-        column_count: int,
-        blob_cols: list[str],
-    ) -> str:
-        """Create an HTML representation of the DataFrame."""
-        opts = bigframes.options.display
-        with display_options.pandas_repr(opts):
-            # TODO(shuowei, b/464053870): Escaping HTML would be useful, but
-            # `escape=False` is needed to show images. We may need to implement
-            # a full-fledged repr module to better support types not in pandas.
-            if bigframes.options.display.blob_display and blob_cols:
-
-                def obj_ref_rt_to_html(obj_ref_rt) -> str:
-                    obj_ref_rt_json = json.loads(obj_ref_rt)
-                    obj_ref_details = obj_ref_rt_json["objectref"]["details"]
-                    if "gcs_metadata" in obj_ref_details:
-                        gcs_metadata = obj_ref_details["gcs_metadata"]
-                        content_type = typing.cast(
-                            str, gcs_metadata.get("content_type", "")
-                        )
-                        if content_type.startswith("image"):
-                            size_str = ""
-                            if bigframes.options.display.blob_display_width:
-                                size_str = f' width="{bigframes.options.display.blob_display_width}"'
-                            if bigframes.options.display.blob_display_height:
-                                size_str = (
-                                    size_str
-                                    + f' height="{bigframes.options.display.blob_display_height}"'
-                                )
-                            url = obj_ref_rt_json["access_urls"]["read_url"]
-                            return f'<img src="{url}"{size_str}>'
-
-                    return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
-
-                formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
-
-                # set max_colwidth so not to truncate the image url
-                with pandas.option_context("display.max_colwidth", None):
-                    html_string = pandas_df.to_html(
-                        escape=False,
-                        notebook=True,
-                        max_rows=pandas.get_option("display.max_rows"),
-                        max_cols=pandas.get_option("display.max_columns"),
-                        show_dimensions=pandas.get_option("display.show_dimensions"),
-                        formatters=formatters,  # type: ignore
-                    )
-            else:
-                # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
-                html_string = pandas_df._repr_html_()  # type:ignore
-
-        html_string += f"[{row_count} rows x {column_count} columns in total]"
-        return html_string
-
     def __delitem__(self, key: str):
         df = self.drop(columns=[key])
         self._set_block(df._get_block())
diff --git a/bigframes/display/html.py b/bigframes/display/html.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 import html
+import json
 import traceback
 import typing
 from typing import Any, Union
@@ -26,7 +27,7 @@
 import pandas.api.types
 
 import bigframes
-from bigframes._config import options
+from bigframes._config import display_options, options
 from bigframes.display import plaintext
 
 if typing.TYPE_CHECKING:
@@ -110,12 +111,6 @@ def create_html_representation(
     blob_cols: list[str],
 ) -> str:
     """Create an HTML representation of the DataFrame or Series."""
-    # Note: We need to import Series here to avoid circular imports, but only if we use isinstance.
-    # To check if it is a Series without importing, we can check if it has the _repr_html_ method
-    # or rely on duck typing. However, the original code used isinstance.
-    # Let's import inside the function if needed, or rely on attribute checks.
-    # But wait, type checking imports are not available at runtime.
-    # We can check __class__.__name__ or similar, or just import locally.
     from bigframes.series import Series
 
     if isinstance(obj, Series):
@@ -124,12 +119,63 @@ def create_html_representation(
             html_string = pd_series._repr_html_()
         except AttributeError:
             html_string = f"<pre>{pd_series.to_string()}</pre>"
+
+        # Series doesn't typically show total rows/cols like DF in HTML repr here?
+        # But let's check what it was doing.
+        # Original code just returned _repr_html_ or wrapped to_string.
+        # It didn't append row/col count string for Series (wait, Series usually has length in repr).
+        return html_string
     else:
         # It's a DataFrame
-        html_string = obj._create_html_representation(
-            pandas_df, total_rows, total_columns, blob_cols
-        )
-    return html_string
+        opts = options.display
+        with display_options.pandas_repr(opts):
+            # TODO(shuowei, b/464053870): Escaping HTML would be useful, but
+            # `escape=False` is needed to show images. We may need to implement
+            # a full-fledged repr module to better support types not in pandas.
+            if options.display.blob_display and blob_cols:
+
+                def obj_ref_rt_to_html(obj_ref_rt) -> str:
+                    obj_ref_rt_json = json.loads(obj_ref_rt)
+                    obj_ref_details = obj_ref_rt_json["objectref"]["details"]
+                    if "gcs_metadata" in obj_ref_details:
+                        gcs_metadata = obj_ref_details["gcs_metadata"]
+                        content_type = typing.cast(
+                            str, gcs_metadata.get("content_type", "")
+                        )
+                        if content_type.startswith("image"):
+                            size_str = ""
+                            if options.display.blob_display_width:
+                                size_str = (
+                                    f' width="{options.display.blob_display_width}"'
+                                )
+                            if options.display.blob_display_height:
+                                size_str = (
+                                    size_str
+                                    + f' height="{options.display.blob_display_height}"'
+                                )
+                            url = obj_ref_rt_json["access_urls"]["read_url"]
+                            return f'<img src="{url}"{size_str}>'
+
+                    return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
+
+                formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols}
+
+                # set max_colwidth so not to truncate the image url
+                with pandas.option_context("display.max_colwidth", None):
+                    html_string = pandas_df.to_html(
+                        escape=False,
+                        notebook=True,
+                        max_rows=pandas.get_option("display.max_rows"),
+                        max_cols=pandas.get_option("display.max_columns"),
+                        show_dimensions=pandas.get_option("display.show_dimensions"),
+                        formatters=formatters,  # type: ignore
+                    )
+            else:
+                # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
+                html_string = pandas_df._repr_html_()  # type:ignore
+
+        html_string += f"[{total_rows} rows x {total_columns} columns in total]"
+        return html_string
 
 
 def get_anywidget_bundle(
@@ -189,7 +235,7 @@ def repr_mimebundle(
     """
     from bigframes.series import Series
 
-    opts = bigframes.options.display
+    opts = options.display
     if opts.repr_mode == "anywidget":
         try:
             return get_anywidget_bundle(obj, include=include, exclude=exclude)
diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py
@@ -19,6 +19,11 @@
 import typing
 from typing import Union
 
+import pandas
+import pandas.io.formats
+
+from bigframes._config import display_options, options
+
 if typing.TYPE_CHECKING:
     import pandas as pd
 
@@ -32,6 +37,55 @@ def create_text_representation(
     total_rows: typing.Optional[int],
 ) -> str:
     """Create a text representation of the DataFrame or Series."""
-    # TODO(swast): This module should probably just be removed and combined
-    # with the html module.
-    return obj._create_text_representation(pandas_df, total_rows)
+    from bigframes.series import Series
+
+    opts = options.display
+
+    if isinstance(obj, Series):
+        with display_options.pandas_repr(opts):
+            pd_series = pandas_df.iloc[:, 0]
+            if len(obj._block.index_columns) == 0:
+                repr_string = pd_series.to_string(
+                    length=False, index=False, name=True, dtype=True
+                )
+            else:
+                repr_string = pd_series.to_string(length=False, name=True, dtype=True)
+
+        lines = repr_string.split("\n")
+        is_truncated = total_rows is not None and total_rows > len(pandas_df)
+
+        if is_truncated:
+            lines.append("...")
+            lines.append("")  # Add empty line for spacing only if truncated
+            lines.append(f"[{total_rows} rows]")
+
+        return "\n".join(lines)
+
+    else:
+        # DataFrame
+        with display_options.pandas_repr(opts):
+            # safe to mutate this, this dict is owned by this code, and does not affect global config
+            to_string_kwargs = (
+                pandas.io.formats.format.get_dataframe_repr_params()  # type: ignore
+            )
+            if not obj._has_index:
+                to_string_kwargs.update({"index": False})
+
+            # We add our own dimensions string, so don't want pandas to.
+            to_string_kwargs.update({"show_dimensions": False})
+            repr_string = pandas_df.to_string(**to_string_kwargs)
+
+        lines = repr_string.split("\n")
+        is_truncated = total_rows is not None and total_rows > len(pandas_df)
+
+        if is_truncated:
+            lines.append("...")
+            lines.append("")  # Add empty line for spacing only if truncated
+            column_count = len(obj.columns)
+            lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
+        else:
+            # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False
+            column_count = len(obj.columns)
+            lines.append("")
+            lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
+        return "\n".join(lines)
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -568,32 +568,6 @@ def reset_index(
                 block = block.assign_label(self._value_column, name)
             return bigframes.dataframe.DataFrame(block)
 
-    def _create_text_representation(
-        self,
-        pandas_df: pandas.DataFrame,
-        total_rows: typing.Optional[int],
-    ) -> str:
-        """Create a text representation of the Series."""
-        opts = bigframes.options.display
-        with bigframes._config.display_options.pandas_repr(opts):
-            pd_series = pandas_df.iloc[:, 0]
-            if len(self._block.index_columns) == 0:
-                repr_string = pd_series.to_string(
-                    length=False, index=False, name=True, dtype=True
-                )
-            else:
-                repr_string = pd_series.to_string(length=False, name=True, dtype=True)
-
-        lines = repr_string.split("\n")
-        is_truncated = total_rows is not None and total_rows > len(pandas_df)
-
-        if is_truncated:
-            lines.append("...")
-            lines.append("")  # Add empty line for spacing only if truncated
-            lines.append(f"[{total_rows} rows]")
-
-        return "\n".join(lines)
-
     def _repr_mimebundle_(self, include=None, exclude=None):
         """
         Custom display method for IPython/Jupyter environments.
@@ -609,6 +583,10 @@ def __repr__(self) -> str:
         if not hasattr(self, "_block"):
             return object.__repr__(self)
 
+        # TODO(swast): Add a timeout here? If the query is taking a long time,
+        # maybe we just print the job metadata that we have so far?
+        # TODO(swast): Avoid downloading the whole series by using job
+        # metadata, like we do with DataFrame.
         opts = bigframes.options.display
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())