Skip to content

Commit be1dea4

Browse files
committed
refactor testcase
1 parent 3119771 commit be1dea4

File tree

1 file changed

+60
-18
lines changed

1 file changed

+60
-18
lines changed

tests/system/small/test_dataframe_io.py

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -376,30 +376,74 @@ def test_to_pandas_batches_w_empty_dataframe(session):
376376
pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
377377

378378

379-
def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
380-
"""Verifies to_pandas_batches() preserves dtypes for nested JSON."""
379+
@pytest.mark.skipif(
380+
bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
381+
reason="Test for pandas 1.x behavior only",
382+
)
383+
def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas1(session):
384+
"""Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 1.x."""
385+
sql = """
386+
SELECT
387+
0 AS id,
388+
[JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
389+
STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
390+
"""
391+
df = session.read_gbq(sql, index_col="id")
392+
batches = list(df.to_pandas_batches())
393+
394+
assert batches[0].dtypes["json_array"] == "object"
395+
assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
381396

397+
398+
@pytest.mark.skipif(
399+
not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
400+
reason="Test for pandas 2.x behavior only",
401+
)
402+
def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas2(session):
403+
"""Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 2.x."""
382404
sql = """
383405
SELECT
384406
0 AS id,
385407
[JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
386408
STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
387409
"""
388410
df = session.read_gbq(sql, index_col="id")
411+
batches = list(df.to_pandas_batches())
412+
413+
assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
414+
assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
415+
assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
416+
417+
418+
@pytest.mark.skipif(
419+
bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
420+
reason="Test for pandas 1.x behavior only",
421+
)
422+
def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas1(session):
423+
"""Verify to_pandas_batches() works with empty nested JSON types in pandas 1.x."""
424+
425+
sql = """
426+
SELECT
427+
1 AS id,
428+
[] AS json_array,
429+
STRUCT(NULL AS json_field, 'test2' AS str_field) AS json_struct
430+
"""
431+
df = session.read_gbq(sql, index_col="id")
389432

433+
# The main point: this should not raise an error
390434
batches = list(df.to_pandas_batches())
435+
assert sum(len(b) for b in batches) == 1
391436

392-
# Focuses only on the "preserves dtypes" behavior.
393-
# This implicitly checks that at least one batch was produced.
394-
pd.testing.assert_series_equal(
395-
batches[0].dtypes,
396-
df.dtypes,
397-
check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
398-
)
437+
assert batches[0].dtypes["json_array"] == "object"
438+
assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
399439

400440

401-
def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
402-
"""Verify to_pandas_batches() works with empty nested JSON types."""
441+
@pytest.mark.skipif(
442+
not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
443+
reason="Test for pandas 2.x behavior only",
444+
)
445+
def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas2(session):
446+
"""Verify to_pandas_batches() works with empty nested JSON types in pandas 2.x."""
403447

404448
sql = """
405449
SELECT
@@ -409,15 +453,13 @@ def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
409453
"""
410454
df = session.read_gbq(sql, index_col="id")
411455

412-
# Verify that this line does not raise an error.
456+
# The main point: this should not raise an error
413457
batches = list(df.to_pandas_batches())
458+
assert sum(len(b) for b in batches) == 1
414459

415-
# Verify the resulting dtypes are correct for the empty/null data
416-
pd.testing.assert_series_equal(
417-
batches[0].dtypes,
418-
df.dtypes,
419-
check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
420-
)
460+
assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
461+
assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
462+
assert isinstance(batches[0].dtypes["json_struct"].pyarrow_dtype, pa.StructType)
421463

422464

423465
@pytest.mark.parametrize("allow_large_results", (True, False))

0 commit comments

Comments
 (0)