code refactor

shuoweil · shuoweil · commit 5955bfe0a6a4 · 2025-10-31T19:55:11.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -720,17 +720,12 @@ def to_pandas_batches(
         series_map = {}
         for col in itertools.chain(self.value_columns, self.index_columns):
             dtype = self.expr.get_column_type(col)
-            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
-                # Due to a limitation in Apache Arrow (#45262), JSON columns are not
-                # natively supported by the to_pandas_batches() method, which is
-                # used by the anywidget backend.
-                # Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
-                # PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
-                # especially when nested.
+            try:
+                series_map[col] = pd.Series([], dtype=dtype)
+            except pa.ArrowNotImplementedError:
+                # PyArrow doesn't support creating an empty array with
+                # db_dtypes.JSONArrowType, especially when nested.
                 # Create with string type and then cast.
-
-                # MyPy doesn't automatically narrow the type of 'dtype' here,
-                # so we add an explicit check.
                 if isinstance(dtype, pd.ArrowDtype):
                     safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
                         dtype.pyarrow_dtype
@@ -742,8 +737,6 @@ def to_pandas_batches(
                     # contains_db_dtypes_json_dtype is accurate,
                     # but it's here for MyPy's sake.
                     series_map[col] = pd.Series([], dtype=dtype)
-            else:
-                series_map[col] = pd.Series([], dtype=dtype)
         empty_val = pd.DataFrame(series_map)
         dfs = map(
             lambda a: a[0],
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
@@ -45,7 +45,6 @@
 import google.cloud.bigquery.table
 from google.cloud.bigquery_storage_v1 import types as bq_storage_types
 import pandas
-import pyarrow as pa
 
 import bigframes._tools
 import bigframes._tools.strings
@@ -1307,22 +1306,6 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
     return configuration
 
 
-def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
-    """
-    Searches recursively for JSON array type within a PyArrow DataType.
-    """
-    if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
-        return True
-    if pa.types.is_list(arrow_type):
-        return _has_json_arrow_type(arrow_type.value_type)
-    if pa.types.is_struct(arrow_type):
-        for i in range(arrow_type.num_fields):
-            if _has_json_arrow_type(arrow_type.field(i).type):
-                return True
-        return False
-    return False
-
-
 def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     """
     Determines whether a datatype is supported by bq load jobs.
@@ -1339,9 +1322,9 @@ def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     if column_type == bigframes.dtypes.JSON_DTYPE:
         return
 
-    if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
-        column_type.pyarrow_dtype
-    ):
+    if isinstance(
+        column_type, pandas.ArrowDtype
+    ) and bigframes.dtypes.contains_db_dtypes_json_dtype(column_type):
         raise NotImplementedError(
             f"Nested JSON types, found in column `{name}`: `{column_type}`', "
             f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"