Skip to content

Commit aa04bac

Browse files
committed
Revert "Correctly display DataFrames with JSON columns in anywidget"
This reverts commit 8c34512.
1 parent 05e9b69 commit aa04bac

File tree

5 files changed

+11
-163
lines changed

5 files changed

+11
-163
lines changed

bigframes/core/blocks.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import warnings
4444

4545
import bigframes_vendored.constants as constants
46-
import db_dtypes
4746
import google.cloud.bigquery as bigquery
4847
import numpy
4948
import pandas as pd
@@ -135,21 +134,6 @@ class MaterializationOptions:
135134
ordered: bool = True
136135

137136

138-
def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
139-
"""Recursively replace JSONArrowType with string type."""
140-
if isinstance(pa_type, db_dtypes.JSONArrowType):
141-
return pa.string()
142-
if isinstance(pa_type, pa.ListType):
143-
return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
144-
if isinstance(pa_type, pa.StructType):
145-
new_fields = [
146-
field.with_type(_replace_json_arrow_with_string(field.type))
147-
for field in pa_type
148-
]
149-
return pa.struct(new_fields)
150-
return pa_type
151-
152-
153137
class Block:
154138
"""A immutable 2D data structure."""
155139

bigframes/dataframe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,8 @@ def __repr__(self) -> str:
783783

784784
opts = bigframes.options.display
785785
max_results = opts.max_rows
786+
# anywdiget mode uses the same display logic as the "deferred" mode
787+
# for faster execution
786788
if opts.repr_mode in ("deferred", "anywidget"):
787789
return formatter.repr_query_job(self._compute_dry_run())
788790

bigframes/session/executor.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
5252
result_rows = 0
5353

5454
for batch in self._arrow_batches:
55-
# Convert JSON columns to strings before casting
56-
batch = self._convert_json_to_string(batch)
5755
batch = pyarrow_utils.cast_batch(batch, self.schema.to_pyarrow())
5856
result_rows += batch.num_rows
5957

@@ -69,38 +67,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
6967

7068
yield batch
7169

72-
def _convert_json_to_string(
73-
self, batch: pyarrow.RecordBatch
74-
) -> pyarrow.RecordBatch:
75-
"""Convert JSON arrow extension types to string to avoid PyArrow compatibility issues."""
76-
import logging
77-
78-
new_arrays = []
79-
new_fields = []
80-
81-
for i, field in enumerate(batch.schema):
82-
array = batch.column(i)
83-
84-
# Check if this column should be JSON based on our schema
85-
schema_item = next(
86-
(item for item in self.schema.items if item.column == field.name), None
87-
)
88-
89-
if schema_item and schema_item.dtype == bigframes.dtypes.JSON_DTYPE:
90-
logging.info(f"Converting JSON column: {field.name}")
91-
# Convert JSONArrowType to string
92-
if array.type == bigframes.dtypes.JSON_ARROW_TYPE:
93-
array = array.cast(pyarrow.string())
94-
new_fields.append(pyarrow.field(field.name, pyarrow.string()))
95-
else:
96-
new_fields.append(field)
97-
98-
new_arrays.append(array)
99-
100-
return pyarrow.RecordBatch.from_arrays(
101-
new_arrays, schema=pyarrow.schema(new_fields)
102-
)
103-
10470
def to_arrow_table(self) -> pyarrow.Table:
10571
# Need to provide schema if no result rows, as arrow can't infer
10672
# If ther are rows, it is safest to infer schema from batches.

mypy.ini

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,3 @@ ignore_missing_imports = True
4444

4545
[mypy-anywidget]
4646
ignore_missing_imports = True
47-
48-
[mypy-db_dtypes]
49-
ignore_missing_imports = True

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 9 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,7 @@
3535
"execution_count": 2,
3636
"id": "ca22f059",
3737
"metadata": {},
38-
"outputs": [
39-
{
40-
"name": "stderr",
41-
"output_type": "stream",
42-
"text": [
43-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
44-
" warnings.warn(message, FutureWarning)\n"
45-
]
46-
}
47-
],
38+
"outputs": [],
4839
"source": [
4940
"import bigframes.pandas as bpd"
5041
]
@@ -151,9 +142,9 @@
151142
{
152143
"data": {
153144
"application/vnd.jupyter.widget-view+json": {
154-
"model_id": "473b016aa6b24c86aafc6372352e822d",
145+
"model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
155146
"version_major": 2,
156-
"version_minor": 1
147+
"version_minor": 0
157148
},
158149
"text/plain": [
159150
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -214,17 +205,16 @@
214205
{
215206
"data": {
216207
"application/vnd.jupyter.widget-view+json": {
217-
"model_id": "339279cc312e4e7fb67923e4e6ad7779",
208+
"model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
218209
"version_major": 2,
219-
"version_minor": 1
210+
"version_minor": 0
220211
},
221212
"text/plain": [
222213
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
223214
]
224215
},
225-
"execution_count": 7,
226216
"metadata": {},
227-
"output_type": "execute_result"
217+
"output_type": "display_data"
228218
}
229219
],
230220
"source": [
@@ -314,17 +304,16 @@
314304
{
315305
"data": {
316306
"application/vnd.jupyter.widget-view+json": {
317-
"model_id": "8ff1f64c44304da0944eadbd0fb3981d",
307+
"model_id": "651b5aac958c408183775152c2573a03",
318308
"version_major": 2,
319-
"version_minor": 1
309+
"version_minor": 0
320310
},
321311
"text/plain": [
322312
"TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
323313
]
324314
},
325-
"execution_count": 9,
326315
"metadata": {},
327-
"output_type": "execute_result"
316+
"output_type": "display_data"
328317
}
329318
],
330319
"source": [
@@ -334,96 +323,6 @@
334323
"print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
335324
"small_widget"
336325
]
337-
},
338-
{
339-
"cell_type": "markdown",
340-
"id": "added-cell-2",
341-
"metadata": {},
342-
"source": [
343-
"### Displaying Generative AI results containing JSON\n",
344-
"The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
345-
]
346-
},
347-
{
348-
"cell_type": "code",
349-
"execution_count": 10,
350-
"id": "added-cell-1",
351-
"metadata": {},
352-
"outputs": [
353-
{
354-
"data": {
355-
"text/html": [
356-
"✅ Completed. \n",
357-
" Query processed 85.9 kB in 15 seconds of slot time.\n",
358-
" "
359-
],
360-
"text/plain": [
361-
"<IPython.core.display.HTML object>"
362-
]
363-
},
364-
"metadata": {},
365-
"output_type": "display_data"
366-
},
367-
{
368-
"name": "stderr",
369-
"output_type": "stream",
370-
"text": [
371-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
372-
"instead of using `db_dtypes` in the future when available in pandas\n",
373-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
374-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
375-
]
376-
},
377-
{
378-
"data": {
379-
"text/html": [
380-
"✅ Completed. "
381-
],
382-
"text/plain": [
383-
"<IPython.core.display.HTML object>"
384-
]
385-
},
386-
"metadata": {},
387-
"output_type": "display_data"
388-
},
389-
{
390-
"data": {
391-
"application/vnd.jupyter.widget-view+json": {
392-
"model_id": "a6d61e48cca642b7a57e6431359b4cc4",
393-
"version_major": 2,
394-
"version_minor": 1
395-
},
396-
"text/plain": [
397-
"TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
398-
]
399-
},
400-
"metadata": {},
401-
"output_type": "display_data"
402-
},
403-
{
404-
"data": {
405-
"text/html": [],
406-
"text/plain": [
407-
"Computation deferred. Computation will process 0 Bytes"
408-
]
409-
},
410-
"execution_count": 10,
411-
"metadata": {},
412-
"output_type": "execute_result"
413-
}
414-
],
415-
"source": [
416-
"bpd._read_gbq_colab(\"\"\"\n",
417-
" SELECT\n",
418-
" AI.GENERATE(\n",
419-
" prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n",
420-
" connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n",
421-
" output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n",
422-
" *\n",
423-
" FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
424-
" LIMIT 5;\n",
425-
"\"\"\")"
426-
]
427326
}
428327
],
429328
"metadata": {

0 commit comments

Comments
 (0)