diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 044fc90306..804ee6f926 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -1023,6 +1023,8 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp): x, ibis_dtypes.string, safe=op.safe ) return parse_json_in_safe(x_str) if op.safe else parse_json(x_str) + if x.type().is_struct(): + return to_json_string(typing.cast(ibis_types.StructValue, x)) if x.type() == ibis_dtypes.json: if to_type == ibis_dtypes.int64: @@ -2069,7 +2071,7 @@ def json_extract_string_array( # type: ignore[empty-body] @ibis_udf.scalar.builtin(name="to_json_string") def to_json_string( # type: ignore[empty-body] - json_obj: ibis_dtypes.JSON, + json_obj, ) -> ibis_dtypes.String: """Convert JSON to STRING.""" diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index ef1b9e7871..ae68dbe7d3 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -641,6 +641,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]: return BIGFRAMES_STRING_TO_BIGFRAMES[ typing.cast(DtypeString, str(dtype_string)) ] + if isinstance(dtype_string, str) and dtype_string.lower() == "json": + return JSON_DTYPE + raise TypeError( textwrap.dedent( f""" @@ -652,9 +655,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]: The following pandas.ExtensionDtype are supported: pandas.BooleanDtype(), pandas.Float64Dtype(), pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"), - pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")), - pd.ArrowDtype(pa.timestamp("us")), - pd.ArrowDtype(pa.timestamp("us", tz="UTC")). + pandas.ArrowDtype(pa.date32()), pandas.ArrowDtype(pa.time64("us")), + pandas.ArrowDtype(pa.timestamp("us")), + pandas.ArrowDtype(pa.timestamp("us", tz="UTC")). {constants.FEEDBACK_LINK} """ ) diff --git a/bigframes/operations/generic_ops.py b/bigframes/operations/generic_ops.py index d6155a770c..ea25086aa9 100644 --- a/bigframes/operations/generic_ops.py +++ b/bigframes/operations/generic_ops.py @@ -324,6 +324,8 @@ def _valid_cast(src: dtypes.Dtype, dst: dtypes.Dtype): if not _valid_cast(src_dtype, dst_dtype): return False return True + if dtypes.is_struct_like(src) and dst == dtypes.JSON_DTYPE: + return True return _valid_scalar_cast(src, dst) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 165e3b6df0..70dcafdb22 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -3866,6 +3866,30 @@ def test_string_astype_timestamp(): pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) +def test_struct_astype_json(): + """See internal issue 444196993.""" + s = series.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "numpy"}, + ] + ) + assert dtypes.is_struct_like(s.dtype) + + expected = series.Series(s, dtype=dtypes.JSON_DTYPE) + assert expected.dtype == dtypes.JSON_DTYPE + + result = s.astype("json") + pd.testing.assert_series_equal( + result.to_pandas(), expected.to_pandas(), check_index_type=False + ) + + result = s.astype(dtypes.JSON_DTYPE) + pd.testing.assert_series_equal( + result.to_pandas(), expected.to_pandas(), check_index_type=False + ) + + def test_timestamp_astype_string(): bf_series = series.Series( [