Skip to content

Commit dba9051

Browse files
committed
add more testcase
1 parent 6c3567b commit dba9051

File tree

5 files changed

+255
-5
lines changed

5 files changed

+255
-5
lines changed

bigframes/display/anywidget.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ def _set_table_html(self) -> None:
231231
cached_data = self._cached_data
232232
else:
233233
break
234+
235+
# Get the data for the current page
234236
page_data = cached_data.iloc[start:end]
235237

236238
# Generate HTML table

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
{
143143
"data": {
144144
"application/vnd.jupyter.widget-view+json": {
145-
"model_id": "1d718cdbafcb42898120637cdb3fa267",
145+
"model_id": "93dd10072d564a02a0278817d14855a9",
146146
"version_major": 2,
147147
"version_minor": 0
148148
},
@@ -205,7 +205,7 @@
205205
{
206206
"data": {
207207
"application/vnd.jupyter.widget-view+json": {
208-
"model_id": "519297c3ad19403aa844cbeabcd5eb44",
208+
"model_id": "6e2538d446e344ac8505e4706730243e",
209209
"version_major": 2,
210210
"version_minor": 0
211211
},
@@ -304,7 +304,7 @@
304304
{
305305
"data": {
306306
"application/vnd.jupyter.widget-view+json": {
307-
"model_id": "37ba207603aa40a38c9786a210e712fd",
307+
"model_id": "d6faf367ea5d44ad9d275506d870557a",
308308
"version_major": 2,
309309
"version_minor": 0
310310
},
@@ -333,6 +333,14 @@
333333
"The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
334334
]
335335
},
336+
{
337+
"cell_type": "code",
338+
"execution_count": null,
339+
"id": "fdadcad6",
340+
"metadata": {},
341+
"outputs": [],
342+
"source": []
343+
},
336344
{
337345
"cell_type": "code",
338346
"execution_count": 10,
@@ -343,7 +351,7 @@
343351
"data": {
344352
"text/html": [
345353
"✅ Completed. \n",
346-
" Query processed 85.9 kB in 23 seconds of slot time.\n",
354+
" Query processed 85.9 kB in 24 seconds of slot time.\n",
347355
" "
348356
],
349357
"text/plain": [
@@ -382,7 +390,7 @@
382390
{
383391
"data": {
384392
"application/vnd.jupyter.widget-view+json": {
385-
"model_id": "379998ea9a744e7b8afd9c1bcb36548d",
393+
"model_id": "b6d6f3bacc2c43fc9a335e6039db12a5",
386394
"version_major": 2,
387395
"version_minor": 0
388396
},

tests/system/small/test_anywidget.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,137 @@ def test_json_column_anywidget_mode(mock_display, json_df: bf.dataframe.DataFram
527527
assert result == ""
528528

529529

530+
def mock_execute_result_with_params(
531+
self, schema, total_rows_val, arrow_batches_val, *args, **kwargs
532+
):
533+
"""
534+
Mocks an execution result with configurable total_rows and arrow_batches.
535+
"""
536+
from bigframes.session.executor import ExecuteResult
537+
538+
return ExecuteResult(
539+
iter(arrow_batches_val),
540+
schema=schema,
541+
query_job=None,
542+
total_bytes=None,
543+
total_rows=total_rows_val,
544+
)
545+
546+
547+
def test_widget_row_count_should_be_immutable_after_creation(
548+
paginated_bf_df: bf.dataframe.DataFrame,
549+
):
550+
"""
551+
Given a widget created with a specific configuration when global display
552+
options are changed later, the widget's original row_count should remain
553+
unchanged.
554+
"""
555+
from bigframes.display.anywidget import TableWidget
556+
557+
# Use a context manager to ensure the option is reset
558+
with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
559+
widget = TableWidget(paginated_bf_df)
560+
initial_row_count = widget.row_count
561+
562+
# Change a global option that could influence row count
563+
bf.options.display.max_rows = 10
564+
565+
# Verify the row count remains immutable.
566+
assert widget.row_count == initial_row_count
567+
568+
569+
class FaultyIterator:
570+
def __iter__(self):
571+
return self
572+
573+
def __next__(self):
574+
raise ValueError("Simulated read error")
575+
576+
577+
def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
578+
paginated_bf_df: bf.dataframe.DataFrame,
579+
monkeypatch: pytest.MonkeyPatch,
580+
):
581+
"""
582+
Given an internal component fails to return valid execution data,
583+
when the TableWidget is created, its error_message should be set and displayed.
584+
"""
585+
# Patch the executor's 'execute' method to simulate an error.
586+
monkeypatch.setattr(
587+
"bigframes.session.bq_caching_executor.BigQueryCachingExecutor.execute",
588+
lambda self, *args, **kwargs: mock_execute_result_with_params(
589+
self, paginated_bf_df._block.expr.schema, None, [], *args, **kwargs
590+
),
591+
)
592+
593+
# Create the TableWidget under the error condition.
594+
with bf.option_context("display.repr_mode", "anywidget"):
595+
from bigframes.display.anywidget import TableWidget
596+
597+
# The widget should handle the faulty data from the mock without crashing.
598+
widget = TableWidget(paginated_bf_df)
599+
600+
# The widget should have an error message and display it in the HTML.
601+
assert widget.row_count == 0
602+
assert widget._error_message is not None
603+
assert "Could not determine total row count" in widget._error_message
604+
assert widget._error_message in widget.table_html
605+
606+
607+
def test_widget_row_count_reflects_actual_data_available(
608+
paginated_bf_df: bf.dataframe.DataFrame,
609+
):
610+
"""
611+
Test that widget row_count reflects the actual data available,
612+
regardless of theoretical limits.
613+
"""
614+
from bigframes.display.anywidget import TableWidget
615+
616+
# Set up display options that define a page size.
617+
with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
618+
widget = TableWidget(paginated_bf_df)
619+
620+
# The widget should report the total rows in the DataFrame,
621+
# not limited by page_size (which only affects pagination)
622+
assert widget.row_count == EXPECTED_ROW_COUNT
623+
assert widget.page_size == 2 # Respects the display option
624+
625+
530626
# TODO(shuowei): Add tests for custom index and multiindex
531627
# This may not be necessary for the SQL Cell use case but should be
532628
# considered for completeness.
629+
630+
631+
@pytest.fixture(scope="module")
632+
def empty_json_df(session: bf.Session) -> bf.dataframe.DataFrame:
633+
"""Create an empty DataFrame with a JSON column for testing."""
634+
import bigframes.dtypes
635+
636+
pandas_df = pd.DataFrame(
637+
{
638+
"a": pd.Series(dtype="int64"),
639+
"b": pd.Series(dtype=bigframes.dtypes.JSON_DTYPE),
640+
}
641+
)
642+
return session.read_pandas(pandas_df)
643+
644+
645+
def test_empty_widget_with_json_column(empty_json_df: bf.dataframe.DataFrame):
646+
"""Given an empty DataFrame with a JSON column, the widget should render table headers."""
647+
with bf.option_context("display.repr_mode", "anywidget"):
648+
from bigframes.display.anywidget import TableWidget
649+
650+
widget = TableWidget(empty_json_df)
651+
html = widget.table_html
652+
653+
assert widget.row_count == 0
654+
assert "<table" in html
655+
assert "a" in html
656+
assert "b" in html
657+
658+
659+
def test_json_column_conversion_warning(json_df: bf.dataframe.DataFrame):
660+
"""Test that a warning is shown when converting JSON columns."""
661+
with bf.option_context("display.repr_mode", "anywidget"):
662+
with pytest.warns(UserWarning, match="Converting JSON columns to strings"):
663+
json_df._repr_html_()

tests/unit/test_dataframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,26 @@ def test_dataframe_ai_property_future_warning(
181181

182182
with pytest.warns(FutureWarning):
183183
dataframe.ai
184+
185+
186+
@pytest.fixture()
187+
def json_df(polars_session: bigframes.session.Session) -> bigframes.dataframe.DataFrame:
188+
"""Create a DataFrame with a JSON column for testing."""
189+
import bigframes.dtypes
190+
191+
pandas_df = pd.DataFrame(
192+
{
193+
"a": [1],
194+
"b": ['{"c": 2, "d": 3}'],
195+
}
196+
)
197+
pandas_df["b"] = pandas_df["b"].astype(bigframes.dtypes.JSON_DTYPE)
198+
return polars_session.read_pandas(pandas_df)
199+
200+
201+
def test_to_pandas_batches_with_json_column(json_df: bigframes.dataframe.DataFrame):
202+
"""Test that JSON columns are converted to strings in to_pandas_batches."""
203+
batches = list(json_df._to_pandas_batches(page_size=10))
204+
assert len(batches) > 0
205+
# Verify the JSON column is now string type
206+
assert batches[0]["b"].dtype == pd.StringDtype(storage="pyarrow")

tests/unit/test_polars_compiler.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas as pd
16+
import polars as pl
17+
import pytest
18+
19+
import bigframes as bf
20+
import bigframes.core.compile.polars.compiler as polars_compiler
21+
import bigframes.core.nodes as nodes
22+
import bigframes.operations.json_ops as json_ops
23+
24+
25+
def test_polars_to_json_string():
26+
"""Test ToJSONString operation in Polars compiler."""
27+
compiler = polars_compiler.PolarsExpressionCompiler()
28+
op = json_ops.ToJSONString()
29+
# Polars doesn't have a native JSON type, it uses strings.
30+
# The operation is a cast to string.
31+
input_expr = pl.lit('{"b": 2}', dtype=pl.String)
32+
result = compiler.compile_op(op, input_expr)
33+
34+
df = pl.DataFrame({"a": ['{"b": 2}']}).lazy()
35+
result_df = df.with_columns(result.alias("b")).collect()
36+
assert result_df["b"][0] == '{"b": 2}'
37+
assert result_df["b"].dtype == pl.String
38+
39+
40+
def test_polars_parse_json():
41+
"""Test ParseJSON operation in Polars compiler."""
42+
compiler = polars_compiler.PolarsExpressionCompiler()
43+
op = json_ops.ParseJSON()
44+
input_expr = pl.lit('{"b": 2}', dtype=pl.String)
45+
result = compiler.compile_op(op, input_expr)
46+
47+
df = pl.DataFrame({"a": ['{"b": 2}']}).lazy()
48+
result_df = df.with_columns(result.alias("b")).collect()
49+
# The result of json_decode is a struct
50+
assert isinstance(result_df["b"][0], dict)
51+
assert result_df["b"][0] == {"b": 2}
52+
53+
54+
@pytest.mark.skip(reason="Polars does not have json_extract on string expressions")
55+
def test_polars_json_extract():
56+
"""Test JSONExtract operation in Polars compiler."""
57+
compiler = polars_compiler.PolarsExpressionCompiler()
58+
op = json_ops.JSONExtract(json_path="$.b")
59+
input_expr = pl.lit('{"a": 1, "b": "hello"}', dtype=pl.String)
60+
result = compiler.compile_op(op, input_expr)
61+
62+
df = pl.DataFrame({"a": ['{"b": "world"}']}).lazy()
63+
result_df = df.with_columns(result.alias("b")).collect()
64+
# json_extract returns a JSON encoded string
65+
assert result_df["b"][0] == '"world"'
66+
67+
68+
def test_readlocal_with_json_column(polars_session):
69+
"""Test ReadLocalNode compilation with JSON columns."""
70+
pandas_df = pd.DataFrame({"data": ['{"key": "value"}']})
71+
pandas_df["data"] = pandas_df["data"].astype(bf.dtypes.JSON_DTYPE)
72+
bf_df = polars_session.read_pandas(pandas_df)
73+
74+
node = bf_df._block.expr.node
75+
# Traverse the node tree to find the ReadLocalNode
76+
while not isinstance(node, nodes.ReadLocalNode):
77+
node = node.child
78+
assert isinstance(node, nodes.ReadLocalNode)
79+
80+
compiler = polars_compiler.PolarsCompiler()
81+
lazy_frame = compiler.compile_node(node)
82+
result_df = lazy_frame.collect()
83+
84+
# The compiler should have converted the JSON column to string.
85+
assert result_df.schema["column_0"] == pl.String
86+
assert result_df["column_0"][0] == '{"key":"value"}'

0 commit comments

Comments
 (0)