@@ -839,78 +839,35 @@ def _repr_html_fallback(self) -> str:
839839 return formatter .repr_query_job (self ._compute_dry_run ())
840840
841841 # Process blob columns first for non-deferred modes
842- self ._cached ()
843- df = self .copy ()
844- if bigframes .options .display .blob_display :
845- blob_cols = [
846- series_name
847- for series_name , series in df .items ()
848- if series .dtype == bigframes .dtypes .OBJ_REF_DTYPE
849- ]
850- for col in blob_cols :
851- # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
852- df [col ] = df [col ].blob ._get_runtime (mode = "R" , with_metadata = True )
853- else :
854- blob_cols = []
842+ df , blob_cols = self ._process_blob_columns ()
855843
856- # Continue with regular HTML rendering for non-anywidget modes
857- # TODO(swast): pass max_columns and get the true column count back. Maybe
858- # get 1 more column than we have requested so that pandas can add the
859- # ... for us?
860844 pandas_df , row_count , query_job = df ._block .retrieve_repr_request_results (
861845 max_results
862846 )
863847
864848 self ._set_internal_query_job (query_job )
865849 column_count = len (pandas_df .columns )
866850
867- with display_options .pandas_repr (opts ):
868- # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
869- if bigframes .options .display .blob_display and blob_cols :
870-
871- def obj_ref_rt_to_html (obj_ref_rt ) -> str :
872- obj_ref_rt_json = json .loads (obj_ref_rt )
873- obj_ref_details = obj_ref_rt_json ["objectref" ]["details" ]
874- if "gcs_metadata" in obj_ref_details :
875- gcs_metadata = obj_ref_details ["gcs_metadata" ]
876- content_type = typing .cast (
877- str , gcs_metadata .get ("content_type" , "" )
878- )
879- if content_type .startswith ("image" ):
880- size_str = ""
881- if bigframes .options .display .blob_display_width :
882- size_str = f' width="{ bigframes .options .display .blob_display_width } "'
883- if bigframes .options .display .blob_display_height :
884- size_str = (
885- size_str
886- + f' height="{ bigframes .options .display .blob_display_height } "'
887- )
888- url = obj_ref_rt_json ["access_urls" ]["read_url" ]
889- return f'<img src="{ url } "{ size_str } >'
890-
891- return f'uri: { obj_ref_rt_json ["objectref" ]["uri" ]} , authorizer: { obj_ref_rt_json ["objectref" ]["authorizer" ]} '
892-
893- formatters = {blob_col : obj_ref_rt_to_html for blob_col in blob_cols }
894-
895- # set max_colwidth so not to truncate the image url
896- with pandas .option_context ("display.max_colwidth" , None ):
897- max_rows = pandas .get_option ("display.max_rows" )
898- max_cols = pandas .get_option ("display.max_columns" )
899- show_dimensions = pandas .get_option ("display.show_dimensions" )
900- html_string = pandas_df .to_html (
901- escape = False ,
902- notebook = True ,
903- max_rows = max_rows ,
904- max_cols = max_cols ,
905- show_dimensions = show_dimensions ,
906- formatters = formatters , # type: ignore
907- )
908- else :
909- # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
910- html_string = pandas_df ._repr_html_ () # type:ignore
851+ return self ._create_html_representation (
852+ pandas_df , row_count , column_count , blob_cols
853+ )
911854
912- html_string += f"[{ row_count } rows x { column_count } columns in total]"
913- return html_string
855+ def _process_blob_columns (self ) -> tuple [DataFrame , list [str ]]:
856+ """Process blob columns for display."""
857+ self ._cached ()
858+ df = self
859+ blob_cols = []
860+ if bigframes .options .display .blob_display :
861+ blob_cols = [
862+ series_name
863+ for series_name , series in self .items ()
864+ if series .dtype == bigframes .dtypes .OBJ_REF_DTYPE
865+ ]
866+ if blob_cols :
867+ df = self .copy ()
868+ for col in blob_cols :
869+ df [col ] = df [col ].blob ._get_runtime (mode = "R" , with_metadata = True )
870+ return df , blob_cols
914871
915872 def _get_anywidget_bundle (self , include = None , exclude = None ):
916873 """
@@ -919,17 +876,7 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
919876 """
920877 from bigframes import display
921878
922- # Process blob columns if needed
923- self ._cached ()
924- df = self .copy ()
925- if bigframes .options .display .blob_display :
926- blob_cols = [
927- series_name
928- for series_name , series in df .items ()
929- if series .dtype == bigframes .dtypes .OBJ_REF_DTYPE
930- ]
931- for col in blob_cols :
932- df [col ] = df [col ].blob ._get_runtime (mode = "R" , with_metadata = True )
879+ df , _ = self ._process_blob_columns ()
933880
934881 # Create and display the widget
935882 widget = display .TableWidget (df )
@@ -946,7 +893,16 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
946893 # the HTML and plain text versions.
947894 widget_repr ["text/html" ] = widget .table_html
948895
949- # Re-create the text representation from what we know.
896+ widget_repr ["text/plain" ] = self ._create_text_representation (
897+ widget ._cached_data , widget .row_count
898+ )
899+
900+ return widget_repr
901+
902+ def _create_text_representation (
903+ self , pandas_df : pandas .DataFrame , total_rows : typing .Optional [int ]
904+ ) -> str :
905+ """Create a text representation of the DataFrame."""
950906 opts = bigframes .options .display
951907 with display_options .pandas_repr (opts ):
952908 import pandas .io .formats
@@ -957,19 +913,20 @@ def _get_anywidget_bundle(self, include=None, exclude=None):
957913 )
958914 if not self ._has_index :
959915 to_string_kwargs .update ({"index" : False })
960- repr_string = widget ._cached_data .to_string (** to_string_kwargs )
916+
917+ # We add our own dimensions string, so don't want pandas to.
918+ to_string_kwargs .update ({"show_dimensions" : False })
919+ repr_string = pandas_df .to_string (** to_string_kwargs )
961920
962921 lines = repr_string .split ("\n " )
963- row_count = widget . row_count
964- if row_count is not None and row_count > len (widget . _cached_data ):
922+
923+ if total_rows is not None and total_rows > len (pandas_df ):
965924 lines .append ("..." )
966925
967926 lines .append ("" )
968927 column_count = len (self .columns )
969- lines .append (f"[{ row_count or '?' } rows x { column_count } columns]" )
970- widget_repr ["text/plain" ] = "\n " .join (lines )
971-
972- return widget_repr
928+ lines .append (f"[{ total_rows or '?' } rows x { column_count } columns]" )
929+ return "\n " .join (lines )
973930
974931 def _repr_mimebundle_ (self , include = None , exclude = None ):
975932 """
@@ -997,27 +954,31 @@ def _repr_mimebundle_(self, include=None, exclude=None):
997954 opts = bigframes .options .display
998955 max_results = opts .max_rows
999956
1000- # Process blob columns first, logic from _repr_html_fallback
1001- self ._cached ()
1002- df = self .copy ()
1003- if bigframes .options .display .blob_display :
1004- blob_cols = [
1005- series_name
1006- for series_name , series in df .items ()
1007- if series .dtype == bigframes .dtypes .OBJ_REF_DTYPE
1008- ]
1009- for col in blob_cols :
1010- df [col ] = df [col ].blob ._get_runtime (mode = "R" , with_metadata = True )
1011- else :
1012- blob_cols = []
957+ df , blob_cols = self ._process_blob_columns ()
1013958
1014959 pandas_df , row_count , query_job = df ._block .retrieve_repr_request_results (
1015960 max_results
1016961 )
1017962 self ._set_internal_query_job (query_job )
1018963 column_count = len (pandas_df .columns )
1019964
1020- # Generate HTML representation
965+ html_string = self ._create_html_representation (
966+ pandas_df , row_count , column_count , blob_cols
967+ )
968+
969+ text_representation = self ._create_text_representation (pandas_df , row_count )
970+
971+ return {"text/html" : html_string , "text/plain" : text_representation }
972+
973+ def _create_html_representation (
974+ self ,
975+ pandas_df : pandas .DataFrame ,
976+ row_count : int ,
977+ column_count : int ,
978+ blob_cols : list [str ],
979+ ) -> str :
980+ """Create an HTML representation of the DataFrame."""
981+ opts = bigframes .options .display
1021982 with display_options .pandas_repr (opts ):
1022983 if bigframes .options .display .blob_display and blob_cols :
1023984
@@ -1057,31 +1018,7 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str:
10571018 html_string = pandas_df ._repr_html_ () # type:ignore
10581019
10591020 html_string += f"[{ row_count } rows x { column_count } columns in total]"
1060-
1061- # Generate text representation
1062- with display_options .pandas_repr (opts ):
1063- import pandas .io .formats
1064-
1065- to_string_kwargs = (
1066- pandas .io .formats .format .get_dataframe_repr_params () # type: ignore
1067- )
1068- if not self ._has_index :
1069- to_string_kwargs .update ({"index" : False })
1070- repr_string = pandas_df .to_string (** to_string_kwargs )
1071-
1072- lines = repr_string .split ("\n " )
1073- pattern = re .compile ("\\ [[0-9]+ rows x [0-9]+ columns\\ ]" )
1074- if pattern .match (lines [- 1 ]):
1075- lines = lines [:- 2 ]
1076-
1077- if row_count > len (lines ) - 1 :
1078- lines .append ("..." )
1079-
1080- lines .append ("" )
1081- lines .append (f"[{ row_count } rows x { column_count } columns]" )
1082- text_representation = "\n " .join (lines )
1083-
1084- return {"text/html" : html_string , "text/plain" : text_representation }
1021+ return html_string
10851022
10861023 def __delitem__ (self , key : str ):
10871024 df = self .drop (columns = [key ])
0 commit comments