Revert "Correctly display DataFrames with JSON columns in anywidget"

shuoweil · shuoweil · commit aa04bac44924 · 2025-10-31T00:08:53.000Z
This reverts commit 8c34512.
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -43,7 +43,6 @@
 import warnings
 
 import bigframes_vendored.constants as constants
-import db_dtypes
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas as pd
@@ -135,21 +134,6 @@ class MaterializationOptions:
     ordered: bool = True
 
 
-def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
-    """Recursively replace JSONArrowType with string type."""
-    if isinstance(pa_type, db_dtypes.JSONArrowType):
-        return pa.string()
-    if isinstance(pa_type, pa.ListType):
-        return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
-    if isinstance(pa_type, pa.StructType):
-        new_fields = [
-            field.with_type(_replace_json_arrow_with_string(field.type))
-            for field in pa_type
-        ]
-        return pa.struct(new_fields)
-    return pa_type
-
-
 class Block:
     """A immutable 2D data structure."""
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -783,6 +783,8 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
@@ -52,8 +52,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
         result_rows = 0
 
         for batch in self._arrow_batches:
-            # Convert JSON columns to strings before casting
-            batch = self._convert_json_to_string(batch)
             batch = pyarrow_utils.cast_batch(batch, self.schema.to_pyarrow())
             result_rows += batch.num_rows
 
@@ -69,38 +67,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
 
             yield batch
 
-    def _convert_json_to_string(
-        self, batch: pyarrow.RecordBatch
-    ) -> pyarrow.RecordBatch:
-        """Convert JSON arrow extension types to string to avoid PyArrow compatibility issues."""
-        import logging
-
-        new_arrays = []
-        new_fields = []
-
-        for i, field in enumerate(batch.schema):
-            array = batch.column(i)
-
-            # Check if this column should be JSON based on our schema
-            schema_item = next(
-                (item for item in self.schema.items if item.column == field.name), None
-            )
-
-            if schema_item and schema_item.dtype == bigframes.dtypes.JSON_DTYPE:
-                logging.info(f"Converting JSON column: {field.name}")
-                # Convert JSONArrowType to string
-                if array.type == bigframes.dtypes.JSON_ARROW_TYPE:
-                    array = array.cast(pyarrow.string())
-                new_fields.append(pyarrow.field(field.name, pyarrow.string()))
-            else:
-                new_fields.append(field)
-
-            new_arrays.append(array)
-
-        return pyarrow.RecordBatch.from_arrays(
-            new_arrays, schema=pyarrow.schema(new_fields)
-        )
-
     def to_arrow_table(self) -> pyarrow.Table:
         # Need to provide schema if no result rows, as arrow can't infer
         # If ther are rows, it is safest to infer schema from batches.
diff --git a/mypy.ini b/mypy.ini
@@ -44,6 +44,3 @@ ignore_missing_imports = True
 
 [mypy-anywidget]
 ignore_missing_imports = True
-
-[mypy-db_dtypes]
-ignore_missing_imports = True
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -35,16 +35,7 @@
    "execution_count": 2,
    "id": "ca22f059",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
-      "  warnings.warn(message, FutureWarning)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import bigframes.pandas as bpd"
    ]
@@ -151,9 +142,9 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "473b016aa6b24c86aafc6372352e822d",
+       "model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -214,17 +205,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "339279cc312e4e7fb67923e4e6ad7779",
+       "model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -314,17 +304,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8ff1f64c44304da0944eadbd0fb3981d",
+       "model_id": "651b5aac958c408183775152c2573a03",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -334,96 +323,6 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "added-cell-2",
-   "metadata": {},
-   "source": [
-    "### Displaying Generative AI results containing JSON\n",
-    "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "added-cell-1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. \n",
-       "    Query processed 85.9 kB in 15 seconds of slot time.\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
-      "instead of using `db_dtypes` in the future when available in pandas\n",
-      "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
-      "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a6d61e48cca642b7a57e6431359b4cc4",
-       "version_major": 2,
-       "version_minor": 1
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "Computation deferred. Computation will process 0 Bytes"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "bpd._read_gbq_colab(\"\"\"\n",
-    "  SELECT\n",
-    "    AI.GENERATE(\n",
-    "      prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n",
-    "      connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n",
-    "      output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n",
-    "    *\n",
-    "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
-    "  LIMIT 5;\n",
-    "\"\"\")"
-   ]
   }
  ],
  "metadata": {