From 9e928f907d730e0c5dd510fa25c928052d94f91e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 10 Sep 2025 22:53:12 +0000 Subject: [PATCH 1/2] tests: add engine tests for casting to json --- tests/system/small/engines/conftest.py | 7 +++ .../system/small/engines/test_generic_ops.py | 54 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/tests/system/small/engines/conftest.py b/tests/system/small/engines/conftest.py index 9699cc6a61..fe8764dee5 100644 --- a/tests/system/small/engines/conftest.py +++ b/tests/system/small/engines/conftest.py @@ -73,6 +73,13 @@ def scalars_array_value( return ArrayValue.from_managed(managed_data_source, fake_session) +@pytest.fixture(scope="module") +def nested_array_value( + nested_data_source: local_data.ManagedArrowTable, fake_session: bigframes.Session +): + return ArrayValue.from_managed(nested_data_source, fake_session) + + @pytest.fixture(scope="module") def zero_row_source() -> local_data.ManagedArrowTable: return local_data.ManagedArrowTable.from_pandas(pd.DataFrame({"a": [], "b": []})) diff --git a/tests/system/small/engines/test_generic_ops.py b/tests/system/small/engines/test_generic_ops.py index 14c6e9a454..c093536de0 100644 --- a/tests/system/small/engines/test_generic_ops.py +++ b/tests/system/small/engines/test_generic_ops.py @@ -14,6 +14,8 @@ import re +import pandas as pd +import pyarrow as pa import pytest from bigframes.core import array_value, expression @@ -275,6 +277,58 @@ def test_engines_astype_from_json(scalars_array_value: array_value.ArrayValue, e assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) +@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +def test_engines_astype_to_json(scalars_array_value: array_value.ArrayValue, engine): + exprs = [ + ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + expression.deref("int64_col") + ), + ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + # Use a const since float to json has precision issues + expression.const(5.2, bigframes.dtypes.FLOAT_DTYPE) + ), + ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + expression.deref("bool_col") + ), + ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + # Use a const since "str_col" has special chars. + expression.const('"hello world"', bigframes.dtypes.STRING_DTYPE) + ), + ] + arr, _ = scalars_array_value.compute_values(exprs) + + assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) + + +@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +def test_engines_astype_struct_to_json( + nested_array_value: array_value.ArrayValue, engine +): + json_data = [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "numpy"}, + ] + exprs = [ + # ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + # expression.deref("label") + # ), + # ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + # expression.deref("address") + # ), + ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( + expression.const( + json_data, + pd.ArrowDtype( + pa.struct([("version", pa.int64()), ("project", pa.string())]) + ), + ) + ), + ] + arr, _ = nested_array_value.compute_values(exprs) + + assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) + + @pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, engine): arr = apply_op( From 79895adc2a4751c6bcf438f6c31bf83c87f8ee24 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 11 Sep 2025 18:09:39 +0000 Subject: [PATCH 2/2] remove test_engines_astype_struct_to_json for internal discussions 444196993 --- tests/system/small/engines/conftest.py | 7 ----- .../system/small/engines/test_generic_ops.py | 31 ------------------- 2 files changed, 38 deletions(-) diff --git a/tests/system/small/engines/conftest.py b/tests/system/small/engines/conftest.py index fe8764dee5..9699cc6a61 100644 --- a/tests/system/small/engines/conftest.py +++ b/tests/system/small/engines/conftest.py @@ -73,13 +73,6 @@ def scalars_array_value( return ArrayValue.from_managed(managed_data_source, fake_session) -@pytest.fixture(scope="module") -def nested_array_value( - nested_data_source: local_data.ManagedArrowTable, fake_session: bigframes.Session -): - return ArrayValue.from_managed(nested_data_source, fake_session) - - @pytest.fixture(scope="module") def zero_row_source() -> local_data.ManagedArrowTable: return local_data.ManagedArrowTable.from_pandas(pd.DataFrame({"a": [], "b": []})) diff --git a/tests/system/small/engines/test_generic_ops.py b/tests/system/small/engines/test_generic_ops.py index c093536de0..fc40b7e59d 100644 --- a/tests/system/small/engines/test_generic_ops.py +++ b/tests/system/small/engines/test_generic_ops.py @@ -14,8 +14,6 @@ import re -import pandas as pd -import pyarrow as pa import pytest from bigframes.core import array_value, expression @@ -300,35 +298,6 @@ def test_engines_astype_to_json(scalars_array_value: array_value.ArrayValue, eng assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) -@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) -def test_engines_astype_struct_to_json( - nested_array_value: array_value.ArrayValue, engine -): - json_data = [ - {"version": 1, "project": "pandas"}, - {"version": 2, "project": "numpy"}, - ] - exprs = [ - # ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( - # expression.deref("label") - # ), - # ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( - # expression.deref("address") - # ), - ops.AsTypeOp(to_type=bigframes.dtypes.JSON_DTYPE).as_expr( - expression.const( - json_data, - pd.ArrowDtype( - pa.struct([("version", pa.int64()), ("project", pa.string())]) - ), - ) - ), - ] - arr, _ = nested_array_value.compute_values(exprs) - - assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine) - - @pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, engine): arr = apply_op(