Skip to content

Commit 907cf2c

Browse files
committed
fix failed tests
1 parent 31b0746 commit 907cf2c

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

bigframes/bigquery/_operations/ai.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ def generate(
123123
if output_schema is None:
124124
output_schema_str = None
125125
else:
126+
# Validate output schema types
127+
for col_name, col_type in output_schema.items():
128+
if col_type.upper() == "JSON":
129+
raise ValueError(
130+
"JSON type is not supported in output_schema. "
131+
"Supported types are: STRING, INT64, FLOAT64, BOOL, ARRAY, and STRUCT."
132+
)
126133
output_schema_str = ", ".join(
127134
[f"{name} {sql_type}" for name, sql_type in output_schema.items()]
128135
)

bigframes/core/compile/polars/compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,13 +434,13 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
434434

435435
@compile_op.register(json_ops.ParseJSON)
436436
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
437-
# Parse string as JSON - this should decode, not encode
438-
return input.str.json_decode()
437+
# In Polars, JSON is stored as string, so no decoding needed
438+
return input
439439

440440
@compile_op.register(json_ops.JSONExtract)
441441
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
442442
assert isinstance(op, json_ops.JSONExtract)
443-
return input.str.json_extract(json_path=op.json_path)
443+
return input.str.json_path_match(op.json_path)
444444

445445
@compile_op.register(arr_ops.ToArrayOp)
446446
def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:

bigframes/ml/llm.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,8 +731,17 @@ def predict(
731731
"ground_with_google_search": ground_with_google_search,
732732
}
733733
if output_schema:
734+
supported_dtypes = (
735+
"int64",
736+
"float64",
737+
"bool",
738+
"string",
739+
"array<type>",
740+
"struct<column type>",
741+
)
734742
output_schema = {
735-
k: utils.standardize_type(v) for k, v in output_schema.items()
743+
k: utils.standardize_type(v, supported_dtypes=supported_dtypes)
744+
for k, v in output_schema.items()
736745
}
737746
options["output_schema"] = output_schema
738747
return self._predict_and_retry(

bigframes/series.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -611,14 +611,6 @@ def astype(
611611
raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
612612
dtype = bigframes.dtypes.bigframes_type(dtype)
613613

614-
# BigQuery doesn't support CAST(json_col AS STRING), but it does support
615-
# TO_JSON_STRING(json_col).
616-
if (
617-
self.dtype == bigframes.dtypes.JSON_DTYPE
618-
and dtype == bigframes.dtypes.STRING_DTYPE
619-
):
620-
return self._apply_unary_op(ops.json_ops.ToJSONString())
621-
622614
return self._apply_unary_op(
623615
bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
624616
)

tests/system/small/test_dataframe.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6142,3 +6142,15 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
61426142

61436143
with pytest.raises(KeyError):
61446144
bf_df.agg(agg_funcs)
6145+
6146+
6147+
def test_to_pandas_batches_with_json_columns(session):
6148+
"""Test that JSON columns are properly handled in to_pandas_batches."""
6149+
# Create a DataFrame with JSON column
6150+
df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
6151+
6152+
# This should not raise an error
6153+
batches = df._to_pandas_batches(page_size=10)
6154+
next(batches)
6155+
6156+
# TODO

0 commit comments

Comments
 (0)