@@ -376,75 +376,52 @@ def test_to_pandas_batches_w_empty_dataframe(session):
376376 pandas .testing .assert_series_equal (results [0 ].dtypes , empty .dtypes )
377377
378378
379- def test_to_pandas_batches_w_empty_dataframe_json_in_list (session ):
380- """Tests to_pandas_batches() with an empty DataFrame containing a list of JSON.
381-
382- Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
379+ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json (session ):
380+ """Verifies to_pandas_batches() preserves dtypes for nested JSON."""
381+ # This SQL query only tests the POPULATED case.
382+ sql = """
383+ SELECT
384+ 0 AS id,
385+ [JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
386+ STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
383387 """
384- import db_dtypes
388+ df = session . read_gbq ( sql , index_col = "id" )
385389
386- json_list_dtype = pd .ArrowDtype (pa .list_ (db_dtypes .JSONArrowType ()))
387- empty_df_with_json_list = bpd .DataFrame (
388- {
389- "idx" : pd .Series ([], dtype = "Int64" ),
390- "json_list_col" : pd .Series ([], dtype = json_list_dtype ),
391- },
392- session = session ,
393- ).set_index ("idx" , drop = True )
390+ batches = list (df .to_pandas_batches ())
394391
395- results = list (empty_df_with_json_list .to_pandas_batches ())
392+ # Check that we processed the row
393+ assert sum (len (b ) for b in batches ) == 1
396394
397- assert len (results ) == 1
398- assert list (results [0 ].columns ) == ["json_list_col" ]
399- assert results [0 ].dtypes ["json_list_col" ] == json_list_dtype
400- assert len (results [0 ]) == 0
395+ # Check dtypes on the resulting batch
396+ assert isinstance (batches [0 ].dtypes ["json_array" ], pd .ArrowDtype )
397+ assert isinstance (batches [0 ].dtypes ["json_array" ].pyarrow_dtype , pa .ListType )
398+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
399+ assert isinstance (batches [0 ].dtypes ["json_struct" ].pyarrow_dtype , pa .StructType )
401400
402401
403- def test_to_pandas_batches_w_empty_dataframe_json_in_struct (session ):
404- """Tests to_pandas_batches() with an empty DataFrame containing a struct of JSON.
402+ def test_to_pandas_batches_should_not_error_on_empty_nested_json (session ):
403+ """Verify to_pandas_batches() works with empty nested JSON types .
405404
406- Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
405+ Regression test for PyArrow limitation with empty JSON arrays.
407406 """
408- import db_dtypes
409-
410- json_struct_dtype = pd .ArrowDtype (
411- pa .struct ([("json_field" , db_dtypes .JSONArrowType ())])
412- )
413- empty_df_with_json_struct = bpd .DataFrame (
414- {
415- "idx" : pd .Series ([], dtype = "Int64" ),
416- "json_struct_col" : pd .Series ([], dtype = json_struct_dtype ),
417- },
418- session = session ,
419- ).set_index ("idx" , drop = True )
420-
421- results = list (empty_df_with_json_struct .to_pandas_batches ())
422-
423- assert len (results ) == 1
424- assert list (results [0 ].columns ) == ["json_struct_col" ]
425- assert results [0 ].dtypes ["json_struct_col" ] == json_struct_dtype
426- assert len (results [0 ]) == 0
427-
428-
429- def test_to_pandas_batches_w_empty_dataframe_simple_json (session ):
430- """Tests to_pandas_batches() with an empty DataFrame containing a simple JSON column.
431-
432- Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
407+ # This SQL query is MINIMAL and tests only the EMPTY regression case.
408+ sql = """
409+ SELECT
410+ 1 AS id,
411+ [] AS json_array,
412+ STRUCT(NULL AS json_field, 'test2' AS str_field) AS json_struct
433413 """
434- empty_df_with_json = bpd .DataFrame (
435- {
436- "idx" : pd .Series ([], dtype = "Int64" ),
437- "json_col" : pd .Series ([], dtype = dtypes .JSON_DTYPE ),
438- },
439- session = session ,
440- ).set_index ("idx" , drop = True )
414+ df = session .read_gbq (sql , index_col = "id" )
441415
442- results = list (empty_df_with_json .to_pandas_batches ())
416+ # The main point of this test is that this line does not raise an error.
417+ batches = list (df .to_pandas_batches ())
443418
444- assert len (results ) == 1
445- assert list (results [0 ].columns ) == ["json_col" ]
446- assert results [0 ].dtypes ["json_col" ] == dtypes .JSON_DTYPE
447- assert len (results [0 ]) == 0
419+ # Verify the row was actually processed and not just skipped
420+ assert sum (len (b ) for b in batches ) == 1
421+
422+ # Verify dtypes are still correct, even with empty data
423+ assert isinstance (batches [0 ].dtypes ["json_array" ], pd .ArrowDtype )
424+ assert isinstance (batches [0 ].dtypes ["json_struct" ], pd .ArrowDtype )
448425
449426
450427@pytest .mark .parametrize ("allow_large_results" , (True , False ))
0 commit comments