diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 41b32d99e4..9576ca8e18 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -376,9 +376,7 @@ def __repr__(self) -> __builtins__.str: # metadata, like we do with DataFrame. opts = bigframes.options.display max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": _, dry_run_query_job = self._block._compute_dry_run() return formatter.repr_query_job(dry_run_query_job) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 173aa48db8..739548f791 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -789,9 +789,7 @@ def __repr__(self) -> str: opts = bigframes.options.display max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) # TODO(swast): pass max_columns and get the true column count back. Maybe @@ -829,68 +827,138 @@ def __repr__(self) -> str: lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _repr_html_(self) -> str: - """ - Returns an html string primarily for use by notebooks for displaying - a representation of the DataFrame. Displays 20 rows by default since - many notebooks are not configured for large tables. - """ - opts = bigframes.options.display - max_results = opts.max_rows - if opts.repr_mode == "deferred": - return formatter.repr_query_job(self._compute_dry_run()) - - # Process blob columns first, regardless of display mode - self._cached() - df = self.copy() + def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: + """Process blob columns for display.""" + df = self + blob_cols = [] if bigframes.options.display.blob_display: blob_cols = [ series_name - for series_name, series in df.items() + for series_name, series in self.items() if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE ] - for col in blob_cols: - # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + if blob_cols: + df = self.copy() + for col in blob_cols: + # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. + df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + return df, blob_cols + + def _get_anywidget_bundle(self, include=None, exclude=None): + """ + Helper method to create and return the anywidget mimebundle. + This function encapsulates the logic for anywidget display. + """ + from bigframes import display + + # TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly. + df, _ = self._get_display_df_and_blob_cols() + + # Create and display the widget + widget = display.TableWidget(df) + widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) + + # Handle both tuple (data, metadata) and dict returns + if isinstance(widget_repr_result, tuple): + widget_repr = dict(widget_repr_result[0]) # Extract data dict from tuple else: - blob_cols = [] + widget_repr = dict(widget_repr_result) - if opts.repr_mode == "anywidget": - try: - from IPython.display import display as ipython_display + # At this point, we have already executed the query as part of the + # widget construction. Let's use the information available to render + # the HTML and plain text versions. + widget_repr["text/html"] = widget.table_html + + widget_repr["text/plain"] = self._create_text_representation( + widget._cached_data, widget.row_count + ) + + return widget_repr + + def _create_text_representation( + self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] + ) -> str: + """Create a text representation of the DataFrame.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + import pandas.io.formats + + # safe to mutate this, this dict is owned by this code, and does not affect global config + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not self._has_index: + to_string_kwargs.update({"index": False}) + + # We add our own dimensions string, so don't want pandas to. + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) - from bigframes import display + lines = repr_string.split("\n") - # Always create a new widget instance for each display call - # This ensures that each cell gets its own widget and prevents - # unintended sharing between cells - widget = display.TableWidget(df.copy()) + if total_rows is not None and total_rows > len(pandas_df): + lines.append("...") - ipython_display(widget) - return "" # Return empty string since we used display() + lines.append("") + column_count = len(self.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + return "\n".join(lines) - except (AttributeError, ValueError, ImportError): - # Fallback if anywidget is not available + def _repr_mimebundle_(self, include=None, exclude=None): + """ + Custom display method for IPython/Jupyter environments. + This is called by IPython's display system when the object is displayed. + """ + opts = bigframes.options.display + # Only handle widget display in anywidget mode + if opts.repr_mode == "anywidget": + try: + return self._get_anywidget_bundle(include=include, exclude=exclude) + + except ImportError: + # Anywidget is an optional dependency, so warn rather than fail. + # TODO(shuowei): When Anywidget becomes the default for all repr modes, + # remove this warning. warnings.warn( "Anywidget mode is not available. " "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to deferred mode. Error: {traceback.format_exc()}" + f"Falling back to static HTML. Error: {traceback.format_exc()}" ) - return formatter.repr_query_job(self._compute_dry_run()) - # Continue with regular HTML rendering for non-anywidget modes - # TODO(swast): pass max_columns and get the true column count back. Maybe - # get 1 more column than we have requested so that pandas can add the - # ... for us? + # In non-anywidget mode, fetch data once and use it for both HTML + # and plain text representations to avoid multiple queries. + opts = bigframes.options.display + max_results = opts.max_rows + + df, blob_cols = self._get_display_df_and_blob_cols() + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( max_results ) - self._set_internal_query_job(query_job) column_count = len(pandas_df.columns) + html_string = self._create_html_representation( + pandas_df, row_count, column_count, blob_cols + ) + + text_representation = self._create_text_representation(pandas_df, row_count) + + return {"text/html": html_string, "text/plain": text_representation} + + def _create_html_representation( + self, + pandas_df: pandas.DataFrame, + row_count: int, + column_count: int, + blob_cols: list[str], + ) -> str: + """Create an HTML representation of the DataFrame.""" + opts = bigframes.options.display with display_options.pandas_repr(opts): - # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas. + # TODO(shuowei, b/464053870): Escaping HTML would be useful, but + # `escape=False` is needed to show images. We may need to implement + # a full-fledged repr module to better support types not in pandas. if bigframes.options.display.blob_display and blob_cols: def obj_ref_rt_to_html(obj_ref_rt) -> str: @@ -919,15 +987,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: # set max_colwidth so not to truncate the image url with pandas.option_context("display.max_colwidth", None): - max_rows = pandas.get_option("display.max_rows") - max_cols = pandas.get_option("display.max_columns") - show_dimensions = pandas.get_option("display.show_dimensions") html_string = pandas_df.to_html( escape=False, notebook=True, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=show_dimensions, + max_rows=pandas.get_option("display.max_rows"), + max_cols=pandas.get_option("display.max_columns"), + show_dimensions=pandas.get_option("display.show_dimensions"), formatters=formatters, # type: ignore ) else: diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 7dc9e964bc..3e030a4aa2 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -291,13 +291,13 @@ def __repr__(self, *args, **kwargs): __repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__)) - def _repr_html_(self, *args, **kwargs): - return _return_type_wrapper(self._df._repr_html_, StreamingDataFrame)( + def _repr_mimebundle_(self, *args, **kwargs): + return _return_type_wrapper(self._df._repr_mimebundle_, StreamingDataFrame)( *args, **kwargs ) - _repr_html_.__doc__ = _curate_df_doc( - inspect.getdoc(dataframe.DataFrame._repr_html_) + _repr_mimebundle_.__doc__ = _curate_df_doc( + inspect.getdoc(dataframe.DataFrame._repr_mimebundle_) ) @property diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index fa324c246a..427a1e5371 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -76,11 +76,50 @@ "id": "f289d250", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 171.4 MB\n" + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]\n" ] } ], @@ -157,22 +196,210 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2935e3f8f4f34c558d588f09a9c42131", + "model_id": "2aad385a8a2f411c822dafe7b07fbad8", "version_major": 2, "version_minor": 1 }, + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state
gender
year
name
number
\n", + " AL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Cora\n", + " \n", + " 61\n", + "
\n", + " AL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Anna\n", + " \n", + " 74\n", + "
\n", + " AR\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Willie\n", + " \n", + " 132\n", + "
\n", + " CO\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Anna\n", + " \n", + " 42\n", + "
\n", + " FL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Louise\n", + " \n", + " 70\n", + "
\n", + " GA\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Catherine\n", + " \n", + " 57\n", + "
\n", + " IL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Jessie\n", + " \n", + " 43\n", + "
\n", + " IN\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Anna\n", + " \n", + " 100\n", + "
\n", + " IN\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Pauline\n", + " \n", + " 77\n", + "
\n", + " IN\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Beulah\n", + " \n", + " 39\n", + "
" + ], "text/plain": [ - "TableWidget(orderable_columns=['state', 'gender', 'year', 'name', 'number'], page_size=10, row_count=5552452, …" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "Computation deferred. Computation will process 171.4 MB" + "state gender year name number\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]" ] }, "execution_count": 6, @@ -255,7 +482,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fa03d998dfee47638a32b6c21ace0b5c", + "model_id": "0c0b83e7e3c048ff8abb525e1bfd6c5f", "version_major": 2, "version_minor": 1 }, @@ -369,7 +596,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6d6886bd2bb74be996d54ce240cbe6c9", + "model_id": "6a60e5dd37c64e76a8e3804dd3531f70", "version_major": 2, "version_minor": 1 }, @@ -401,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "added-cell-1", "metadata": {}, "outputs": [ @@ -409,7 +636,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 24 seconds of slot time.\n", + " Query processed 85.9 kB in 14 seconds of slot time.\n", " " ], "text/plain": [ @@ -453,28 +680,330 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "df774329fd2f47918b986362863d7155", + "model_id": "893065f8a0164648b241f2cc3d1a9271", "version_major": 2, "version_minor": 1 }, + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
result
gcs_path
issuer
language
publication_date
class_international
class_us
application_number
filing_date
priority_date_eu
representative_line_1_eu
applicant_line_1
inventor_line_1
title_line_1
number
\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de73.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " H05B 6/12\n", + " \n", + " <NA>\n", + " \n", + " 18165514.3\n", + " \n", + " 03.04.2018\n", + " \n", + " 30.03.2017\n", + " \n", + " <NA>\n", + " \n", + " BSH Hausger√§te GmbH\n", + " \n", + " Acero Acero, Jesus\n", + " \n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " \n", + " EP 3 383 141 A2\n", + "
\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de2.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 29.08.018\n", + " \n", + " E04H 6/12\n", + " \n", + " <NA>\n", + " \n", + " 18157874.1\n", + " \n", + " 21.02.2018\n", + " \n", + " 22.02.2017\n", + " \n", + " Liedtke & Partner Patentanwälte\n", + " \n", + " SHB Hebezeugbau GmbH\n", + " \n", + " VOLGER, Alexander\n", + " \n", + " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", + " \n", + " EP 3 366 869 A1\n", + "
\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de70.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " H01L 21/20\n", + " \n", + " <NA>\n", + " \n", + " 18166536.5\n", + " \n", + " 16.02.2016\n", + " \n", + " <NA>\n", + " \n", + " Scheider, Sascha et al\n", + " \n", + " EV Group E. Thallner GmbH\n", + " \n", + " Kurz, Florian\n", + " \n", + " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", + " \n", + " EP 3 382 744 A1\n", + "
\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de5.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " G06F 11/30\n", + " \n", + " <NA>\n", + " \n", + " 18157347.8\n", + " \n", + " 19.02.2018\n", + " \n", + " 31.03.2017\n", + " \n", + " Hoffmann Eitle\n", + " \n", + " FUJITSU LIMITED\n", + " \n", + " Kukihara, Kensuke\n", + " \n", + " METHOD EXECUTED BY A COMPUTER, INFORMATION PROCESSING APPARATUS AND\n", + " \n", + " EP 3 382 553 A1\n", + "
\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de56.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " A01K 31/00\n", + " \n", + " <NA>\n", + " \n", + " 18171005.4\n", + " \n", + " 05.02.2015\n", + " \n", + " 05.02.2014\n", + " \n", + " Stork Bamberger Patentanwälte\n", + " \n", + " Linco Food Systems A/S\n", + " \n", + " Thrane, Uffe\n", + " \n", + " MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EINHEIT UND EINER ANORDNUNG\n", + " \n", + " EP 3 381 276 A1\n", + "
" + ], "text/plain": [ - "TableWidget(orderable_columns=['gcs_path', 'issuer', 'language', 'publication_date', 'class_international', 'c…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "Computation deferred. Computation will process 0 Bytes" + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 29.08.018 E04H 6/12 18157874.1 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 03.04.2018 30.03.2017 \n", + "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", + "\n", + " title_line_1 number \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "1 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "\n", + "[5 rows x 15 columns]" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 49d5ff6c92..c8740ed220 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -37,12 +37,12 @@ def paginated_pandas_df() -> pd.DataFrame: { "id": [5, 4, 3, 2, 1, 0], "page_indicator": [ - "row_5", - "row_4", - "row_3", - "row_2", - "row_1", - "row_0", + "page_3_row_2", + "page_3_row_1", + "page_2_row_2", + "page_2_row_1", + "page_1_row_2", + "page_1_row_1", ], "value": [5, 4, 3, 2, 1, 0], } @@ -205,11 +205,12 @@ def test_widget_initialization_should_calculate_total_row_count( assert widget.row_count == EXPECTED_ROW_COUNT -def test_widget_initialization_should_set_default_pagination( +def test_widget_initialization_should_default_to_page_zero( table_widget, ): """ - A TableWidget should initialize with page 0 and the correct page size. + Given a new TableWidget, when it is initialized, + then its page number should default to 0. """ # The `table_widget` fixture already creates the widget. # Assert its state. @@ -259,8 +260,8 @@ def test_widget_navigation_should_display_correct_page( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_widget_navigation_should_raise_error_for_negative_input( - table_widget, paginated_pandas_df: pd.DataFrame +def test_setting_negative_page_should_raise_error( + table_widget, ): """ Given a widget, when a negative page number is set, @@ -270,19 +271,20 @@ def test_widget_navigation_should_raise_error_for_negative_input( table_widget.page = -1 -def test_widget_navigation_should_clamp_to_last_page_for_out_of_bounds_input( +def test_setting_page_beyond_max_should_clamp_to_last_page( table_widget, paginated_pandas_df: pd.DataFrame ): """ - Given a widget, when a page number greater than the max is set, + Given a widget, + when a page number greater than the max is set, then the page number should be clamped to the last valid page. """ - expected_slice = paginated_pandas_df.iloc[4:6] + expected_slice = paginated_pandas_df.iloc[4:6] # Last page data - table_widget.page = 100 + table_widget.page = 100 # Set page far beyond the total of 3 pages html = table_widget.table_html - assert table_widget.page == 2 + assert table_widget.page == 2 # Page is clamped to the last valid page (0-indexed) _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) @@ -332,11 +334,11 @@ def test_widget_with_few_rows_should_display_all_rows(small_widget, small_pandas _assert_html_matches_pandas_slice(html, small_pandas_df, small_pandas_df) -def test_widget_with_few_rows_should_have_only_one_page(small_widget): +def test_navigation_beyond_last_page_should_be_clamped(small_widget): """ - Given a DataFrame with a small number of rows, the widget should - report the correct total row count and prevent navigation beyond - the first page, ensuring the frontend correctly displays "Page 1 of 1". + Given a DataFrame smaller than the page size, + when navigating beyond the last page, + then the page should be clamped to the last valid page (page 0). """ # For a DataFrame with 2 rows and page_size 5 (from small_widget fixture), # the frontend should calculate 1 total page. @@ -351,43 +353,65 @@ def test_widget_with_few_rows_should_have_only_one_page(small_widget): assert small_widget.page == 0 -def test_widget_page_size_should_be_immutable_after_creation( +def test_global_options_change_should_not_affect_existing_widget_page_size( paginated_bf_df: bf.dataframe.DataFrame, ): """ - A widget's page size should be fixed on creation and not be affected - by subsequent changes to global options. + Given an existing widget, + when global display options are changed, + then the widget's page size should remain unchanged. """ with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): from bigframes.display import TableWidget widget = TableWidget(paginated_bf_df) assert widget.page_size == 2 - - # Navigate to second page to ensure widget is in a non-default state - widget.page = 1 + widget.page = 1 # a non-default state assert widget.page == 1 - # Change global max_rows - widget should not be affected - bf.options.display.max_rows = 10 + bf.options.display.max_rows = 10 # Change global setting - assert widget.page_size == 2 # Should remain unchanged - assert widget.page == 1 # Should remain on same page + assert widget.page_size == 2 # Should remain unchanged + assert widget.page == 1 # Page should not be reset -def test_empty_widget_should_have_zero_row_count(empty_bf_df: bf.dataframe.DataFrame): - """Given an empty DataFrame, the widget's row count should be 0.""" +def test_widget_with_empty_dataframe_should_have_zero_row_count( + empty_bf_df: bf.dataframe.DataFrame, +): + """ + Given an empty DataFrame, + when a widget is created from it, + then its row_count should be 0. + """ + with bf.option_context("display.repr_mode", "anywidget"): from bigframes.display import TableWidget widget = TableWidget(empty_bf_df) - assert widget.row_count == 0 + assert widget.row_count == 0 + + +def test_widget_with_empty_dataframe_should_render_table_headers( + empty_bf_df: bf.dataframe.DataFrame, +): + """ + + + Given an empty DataFrame, + + + when a widget is created from it, + + + then its HTML representation should still render the table headers. + + + """ -def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.DataFrame): - """Given an empty DataFrame, the widget should still render table headers.""" with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget widget = TableWidget(empty_bf_df) @@ -395,7 +419,8 @@ def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.Data html = widget.table_html assert "= 1 - assert test_df._block.retrieve_repr_request_results.cache_info().hits >= 1 + assert test_df._block.retrieve_repr_request_results.cache_info().hits == 0 diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py index 0c9c4070f4..d726bfde2c 100644 --- a/tests/system/small/test_progress_bar.py +++ b/tests/system/small/test_progress_bar.py @@ -153,7 +153,9 @@ def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFr pytest.importorskip("anywidget") with bf.option_context("display.repr_mode", "anywidget"): actual_repr = repr(penguins_df_default_index) - assert EXPECTED_DRY_RUN_MESSAGE in actual_repr + assert "species" in actual_repr + assert "island" in actual_repr + assert "[344 rows x 7 columns]" in actual_repr def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame): @@ -161,4 +163,7 @@ def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame) with bf.option_context("display.repr_mode", "anywidget"): index = penguins_df_default_index.index actual_repr = repr(index) - assert EXPECTED_DRY_RUN_MESSAGE in actual_repr + # In non-interactive environments, should still get a useful summary. + assert "Index" in actual_repr + assert "0, 1, 2, 3, 4" in actual_repr + assert "dtype='Int64'" in actual_repr diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index b83380d789..39dbacd087 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -737,7 +737,7 @@ def test_join_repr(scalars_dfs): assert actual == expected -def test_repr_html_w_all_rows(scalars_dfs, session): +def test_mimebundle_html_repr_w_all_rows(scalars_dfs, session): scalars_df, _ = scalars_dfs # get a pandas df of the expected format df, _ = scalars_df._block.to_pandas() @@ -745,7 +745,8 @@ def test_repr_html_w_all_rows(scalars_dfs, session): pandas_df.index.name = scalars_df.index.name # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = scalars_df.head(10)._repr_html_() + bundle = scalars_df.head(10)._repr_mimebundle_() + actual = bundle["text/html"] with display_options.pandas_repr(bigframes.options.display): pandas_repr = pandas_df.head(10)._repr_html_()