revert 1 files to match main branch

shuoweil · shuoweil · commit 39cf5954a182 · 2025-10-30T20:25:38.000Z
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
@@ -123,13 +123,6 @@ def generate(
     if output_schema is None:
         output_schema_str = None
     else:
-        # Validate output schema types
-        for col_name, col_type in output_schema.items():
-            if col_type.upper() == "JSON":
-                raise ValueError(
-                    "JSON type is not supported in output_schema. "
-                    "Supported types are: STRING, INT64, FLOAT64, BOOL, ARRAY, and STRUCT."
-                )
         output_schema_str = ", ".join(
             [f"{name} {sql_type}" for name, sql_type in output_schema.items()]
         )
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
@@ -45,13 +45,13 @@
 polars_installed = True
 if TYPE_CHECKING:
     import polars as pl
-    import pyarrow as pa
 else:
     try:
         import bigframes._importing
 
+        # Use import_polars() instead of importing directly so that we check
+        # the version numbers.
         pl = bigframes._importing.import_polars()
-        import pyarrow as pa
     except Exception:
         polars_installed = False
 
@@ -427,21 +427,6 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, json_ops.JSONDecode)
             return input.str.json_decode(_DTYPE_MAPPING[op.to_type])
 
-        @compile_op.register(json_ops.ToJSONString)
-        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            # Convert JSON to string representation
-            return input.cast(pl.String())
-
-        @compile_op.register(json_ops.ParseJSON)
-        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            # In Polars, JSON is stored as string, so no decoding needed
-            return input
-
-        @compile_op.register(json_ops.JSONExtract)
-        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            assert isinstance(op, json_ops.JSONExtract)
-            return input.str.json_path_match(op.json_path)
-
         @compile_op.register(arr_ops.ToArrayOp)
         def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
             return pl.concat_list(*inputs)
@@ -621,14 +606,9 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
                 scan_item.source_id: scan_item.id.sql
                 for scan_item in node.scan_list.items
             }
-
-            if hasattr(node.local_data_source, "to_arrow"):
-                schema, batches = node.local_data_source.to_arrow(json_type="string")
-                arrow_data = pa.Table.from_batches(batches, schema)
-            else:
-                arrow_data = node.local_data_source.data
-
-            lazy_frame = cast(pl.DataFrame, pl.from_arrow(arrow_data)).lazy()
+            lazy_frame = cast(
+                pl.DataFrame, pl.from_arrow(node.local_data_source.data)
+            ).lazy()
             lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
             if node.offsets_col:
                 lazy_frame = lazy_frame.with_columns(
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -783,8 +783,7 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-
-        # anywidget mode uses the same display logic as the "deferred" mode
+        # anywdiget mode uses the same display logic as the "deferred" mode
         # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
@@ -856,28 +855,14 @@ def _repr_html_(self) -> str:
 
                 from bigframes import display
 
-                # The anywidget frontend doesn't support the db_dtypes JSON type, so
-                # convert to strings for display.
-                json_cols = [
-                    series_name
-                    for series_name, series in df.items()
-                    if bigframes.dtypes.contains_db_dtypes_json_dtype(series.dtype)
-                ]
-                if json_cols:
-                    warnings.warn(
-                        "Converting JSON columns to strings for display. "
-                        "This is temporary and will be removed when the frontend supports JSON types."
-                    )
-                    for col in json_cols:
-                        df[col] = df[col]._apply_unary_op(ops.json_ops.ToJSONString())
-
                 # Always create a new widget instance for each display call
                 # This ensures that each cell gets its own widget and prevents
                 # unintended sharing between cells
                 widget = display.TableWidget(df.copy())
 
                 ipython_display(widget)
                 return ""  # Return empty string since we used display()
+
             except (AttributeError, ValueError, ImportError):
                 # Fallback if anywidget is not available
                 warnings.warn(
@@ -1963,22 +1948,7 @@ def _to_pandas_batches(
         *,
         allow_large_results: Optional[bool] = None,
     ) -> blocks.PandasBatches:
-        # Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
-        # JSON columns are not supported in to_pandas_batches
-        json_cols = [
-            str(col_name)  # Cast to string
-            for col_name, dtype in self.dtypes.items()
-            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype)
-        ]
-
-        df = self
-        if json_cols:
-            # Convert JSON columns to strings before materialization
-            df = df.copy()
-            for col in json_cols:
-                df[col] = df[col].astype("string")
-
-        return df._block.to_pandas_batches(
+        return self._block.to_pandas_batches(
             page_size=page_size,
             max_results=max_results,
             allow_large_results=allow_large_results,
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -209,12 +209,6 @@ def _reset_batches_for_new_page_size(self) -> None:
 
     def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
-        # For empty dataframe, render empty table with headers.
-        if self.row_count == 0:
-            page_data = self._cached_data
-        else:
-            start = self.page * self.page_size
-            end = start + self.page_size
         if self._error_message:
             self.table_html = (
                 f"<div class='bigframes-error-message'>{self._error_message}</div>"
@@ -256,5 +250,8 @@ def _page_size_changed(self, _change: Dict[str, Any]) -> None:
         # Reset the page to 0 when page size changes to avoid invalid page states
         self.page = 0
 
+        # Reset batches to use new page size for future data fetching
+        self._reset_batches_for_new_page_size()
+
         # Update the table display
         self._set_table_html()
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
@@ -731,17 +731,8 @@ def predict(
             "ground_with_google_search": ground_with_google_search,
         }
         if output_schema:
-            supported_dtypes = (
-                "int64",
-                "float64",
-                "bool",
-                "string",
-                "array<type>",
-                "struct<column type>",
-            )
             output_schema = {
-                k: utils.standardize_type(v, supported_dtypes=supported_dtypes)
-                for k, v in output_schema.items()
+                k: utils.standardize_type(v) for k, v in output_schema.items()
             }
             options["output_schema"] = output_schema
             return self._predict_and_retry(
diff --git a/bigframes/operations/output_schemas.py b/bigframes/operations/output_schemas.py
@@ -14,8 +14,6 @@
 
 import pyarrow as pa
 
-from bigframes import dtypes
-
 
 def parse_sql_type(sql: str) -> pa.DataType:
     """
@@ -45,9 +43,6 @@ def parse_sql_type(sql: str) -> pa.DataType:
     if sql.upper() == "BOOL":
         return pa.bool_()
 
-    if sql.upper() == "JSON":
-        return dtypes.JSON_ARROW_TYPE
-
     if sql.upper().startswith("ARRAY<") and sql.endswith(">"):
         inner_type = sql[len("ARRAY<") : -1]
         return pa.list_(parse_sql_type(inner_type))
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -610,7 +610,6 @@ def astype(
         if errors not in ["raise", "null"]:
             raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
         dtype = bigframes.dtypes.bigframes_type(dtype)
-
         return self._apply_unary_op(
             bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
         )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -6142,15 +6142,3 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
 
     with pytest.raises(KeyError):
         bf_df.agg(agg_funcs)
-
-
-def test_to_pandas_batches_with_json_columns(session):
-    """Test that JSON columns are properly handled in to_pandas_batches."""
-    # Create a DataFrame with JSON column
-    df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
-
-    # This should not raise an error
-    batches = df._to_pandas_batches(page_size=10)
-    next(batches)
-
-    # TODO
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -4077,6 +4077,7 @@ def test_json_astype_others(data, to_type, errors):
         pytest.param(["10.2", None], dtypes.INT_DTYPE, id="to_int"),
         pytest.param(["false", None], dtypes.FLOAT_DTYPE, id="to_float"),
         pytest.param(["10.2", None], dtypes.BOOL_DTYPE, id="to_bool"),
+        pytest.param(["true", None], dtypes.STRING_DTYPE, id="to_string"),
     ],
 )
 def test_json_astype_others_raise_error(data, to_type):
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
@@ -181,26 +181,3 @@ def test_dataframe_ai_property_future_warning(
 
     with pytest.warns(FutureWarning):
         dataframe.ai
-
-
-@pytest.fixture()
-def json_df(polars_session: bigframes.session.Session) -> bigframes.dataframe.DataFrame:
-    """Create a DataFrame with a JSON column for testing."""
-    import bigframes.dtypes
-
-    pandas_df = pd.DataFrame(
-        {
-            "a": [1],
-            "b": ['{"c": 2, "d": 3}'],
-        }
-    )
-    pandas_df["b"] = pandas_df["b"].astype(bigframes.dtypes.JSON_DTYPE)
-    return polars_session.read_pandas(pandas_df)
-
-
-def test_to_pandas_batches_with_json_column(json_df: bigframes.dataframe.DataFrame):
-    """Test that JSON columns are converted to strings in to_pandas_batches."""
-    batches = list(json_df._to_pandas_batches(page_size=10))
-    assert len(batches) > 0
-    # Verify the JSON column is now string type
-    assert batches[0]["b"].dtype == pd.StringDtype(storage="pyarrow")
diff --git a/tests/unit/test_polars_compiler.py b/tests/unit/test_polars_compiler.py

Original file line number	Diff line number	Diff line change
`@@ -610,7 +610,6 @@ def astype(`
`610`	`610`	`if errors not in ["raise", "null"]:`
`611`	`611`	`raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")`
`612`	`612`	`dtype = bigframes.dtypes.bigframes_type(dtype)`
`613`		`-`
`614`	`613`	`return self._apply_unary_op(`
`615`	`614`	`bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))`
`616`	`615`	`)`
Original file line number	Diff line number	Diff line change
`@@ -4077,6 +4077,7 @@ def test_json_astype_others(data, to_type, errors):`
`4077`	`4077`	`pytest.param(["10.2", None], dtypes.INT_DTYPE, id="to_int"),`
`4078`	`4078`	`pytest.param(["false", None], dtypes.FLOAT_DTYPE, id="to_float"),`
`4079`	`4079`	`pytest.param(["10.2", None], dtypes.BOOL_DTYPE, id="to_bool"),`
	`4080`	`+ pytest.param(["true", None], dtypes.STRING_DTYPE, id="to_string"),`
`4080`	`4081`	`],`
`4081`	`4082`	`)`
`4082`	`4083`	`def test_json_astype_others_raise_error(data, to_type):`