@@ -376,30 +376,74 @@ def test_to_pandas_batches_w_empty_dataframe(session):
376376 pandas .testing .assert_series_equal (results [0 ].dtypes , empty .dtypes )
377377
378378
379- def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json (session ):
380- """Verifies to_pandas_batches() preserves dtypes for nested JSON."""
379+ @pytest .mark .skipif (
380+ bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
381+ reason = "Test for pandas 1.x behavior only" ,
382+ )
383+ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas1 (session ):
384+ """Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 1.x."""
385+ sql = """
386+ SELECT
387+ 0 AS id,
388+ [JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
389+ STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
390+ """
391+ df = session .read_gbq (sql , index_col = "id" )
392+ batches = list (df .to_pandas_batches ())
393+
394+ assert batches [0 ].dtypes ["json_array" ] == "object"
395+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
381396
397+
398+ @pytest .mark .skipif (
399+ not bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
400+ reason = "Test for pandas 2.x behavior only" ,
401+ )
402+ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas2 (session ):
403+ """Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 2.x."""
382404 sql = """
383405 SELECT
384406 0 AS id,
385407 [JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
386408 STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
387409 """
388410 df = session .read_gbq (sql , index_col = "id" )
411+ batches = list (df .to_pandas_batches ())
412+
413+ assert isinstance (batches [0 ].dtypes ["json_array" ], pd .ArrowDtype )
414+ assert isinstance (batches [0 ].dtypes ["json_array" ].pyarrow_dtype , pa .ListType )
415+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
416+
417+
418+ @pytest .mark .skipif (
419+ bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
420+ reason = "Test for pandas 1.x behavior only" ,
421+ )
422+ def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas1 (session ):
423+ """Verify to_pandas_batches() works with empty nested JSON types in pandas 1.x."""
424+
425+ sql = """
426+ SELECT
427+ 1 AS id,
428+ [] AS json_array,
429+ STRUCT(NULL AS json_field, 'test2' AS str_field) AS json_struct
430+ """
431+ df = session .read_gbq (sql , index_col = "id" )
389432
433+ # The main point: this should not raise an error
390434 batches = list (df .to_pandas_batches ())
435+ assert sum (len (b ) for b in batches ) == 1
391436
392- # Focuses only on the "preserves dtypes" behavior.
393- # This implicitly checks that at least one batch was produced.
394- pd .testing .assert_series_equal (
395- batches [0 ].dtypes ,
396- df .dtypes ,
397- check_dtype = bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
398- )
437+ assert batches [0 ].dtypes ["json_array" ] == "object"
438+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
399439
400440
401- def test_to_pandas_batches_should_not_error_on_empty_nested_json (session ):
402- """Verify to_pandas_batches() works with empty nested JSON types."""
441+ @pytest .mark .skipif (
442+ not bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
443+ reason = "Test for pandas 2.x behavior only" ,
444+ )
445+ def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas2 (session ):
446+ """Verify to_pandas_batches() works with empty nested JSON types in pandas 2.x."""
403447
404448 sql = """
405449 SELECT
@@ -409,15 +453,13 @@ def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
409453 """
410454 df = session .read_gbq (sql , index_col = "id" )
411455
412- # Verify that this line does not raise an error.
456+ # The main point: this should not raise an error
413457 batches = list (df .to_pandas_batches ())
458+ assert sum (len (b ) for b in batches ) == 1
414459
415- # Verify the resulting dtypes are correct for the empty/null data
416- pd .testing .assert_series_equal (
417- batches [0 ].dtypes ,
418- df .dtypes ,
419- check_dtype = bigframes .features .PANDAS_VERSIONS .is_arrow_list_dtype_usable ,
420- )
460+ assert isinstance (batches [0 ].dtypes ["json_array" ], pd .ArrowDtype )
461+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
462+ assert isinstance (batches [0 ].dtypes ["json_struct" ].pyarrow_dtype , pa .StructType )
421463
422464
423465@pytest .mark .parametrize ("allow_large_results" , (True , False ))
0 commit comments