fix failed tests

shuoweil · shuoweil · commit 907cf2c1728a · 2025-10-29T07:07:46.000Z
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
@@ -123,6 +123,13 @@ def generate(
     if output_schema is None:
         output_schema_str = None
     else:
+        # Validate output schema types
+        for col_name, col_type in output_schema.items():
+            if col_type.upper() == "JSON":
+                raise ValueError(
+                    "JSON type is not supported in output_schema. "
+                    "Supported types are: STRING, INT64, FLOAT64, BOOL, ARRAY, and STRUCT."
+                )
         output_schema_str = ", ".join(
             [f"{name} {sql_type}" for name, sql_type in output_schema.items()]
         )
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
@@ -434,13 +434,13 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
 
         @compile_op.register(json_ops.ParseJSON)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            # Parse string as JSON - this should decode, not encode
-            return input.str.json_decode()
+            # In Polars, JSON is stored as string, so no decoding needed
+            return input
 
         @compile_op.register(json_ops.JSONExtract)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, json_ops.JSONExtract)
-            return input.str.json_extract(json_path=op.json_path)
+            return input.str.json_path_match(op.json_path)
 
         @compile_op.register(arr_ops.ToArrayOp)
         def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
@@ -731,8 +731,17 @@ def predict(
             "ground_with_google_search": ground_with_google_search,
         }
         if output_schema:
+            supported_dtypes = (
+                "int64",
+                "float64",
+                "bool",
+                "string",
+                "array<type>",
+                "struct<column type>",
+            )
             output_schema = {
-                k: utils.standardize_type(v) for k, v in output_schema.items()
+                k: utils.standardize_type(v, supported_dtypes=supported_dtypes)
+                for k, v in output_schema.items()
             }
             options["output_schema"] = output_schema
             return self._predict_and_retry(
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -611,14 +611,6 @@ def astype(
             raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
         dtype = bigframes.dtypes.bigframes_type(dtype)
 
-        # BigQuery doesn't support CAST(json_col AS STRING), but it does support
-        # TO_JSON_STRING(json_col).
-        if (
-            self.dtype == bigframes.dtypes.JSON_DTYPE
-            and dtype == bigframes.dtypes.STRING_DTYPE
-        ):
-            return self._apply_unary_op(ops.json_ops.ToJSONString())
-
         return self._apply_unary_op(
             bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
         )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -6142,3 +6142,15 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
 
     with pytest.raises(KeyError):
         bf_df.agg(agg_funcs)
+
+
+def test_to_pandas_batches_with_json_columns(session):
+    """Test that JSON columns are properly handled in to_pandas_batches."""
+    # Create a DataFrame with JSON column
+    df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
+
+    # This should not raise an error
+    batches = df._to_pandas_batches(page_size=10)
+    next(batches)
+
+    # TODO