Skip to content

Commit 4cbaf15

Browse files
committed
Improve JSON type handling for to_gbq and to_pandas_batches
1 parent 466fd06 commit 4cbaf15

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

bigframes/core/blocks.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,12 @@ def to_pandas_batches(
715715
# To reduce the number of edge cases to consider when working with the
716716
# results of this, always return at least one DataFrame. See:
717717
# b/428918844.
718+
empty_val = pd.DataFrame(
719+
{
720+
col: pd.Series([], dtype=self.expr.get_column_type(col))
721+
for col in itertools.chain(self.value_columns, self.index_columns)
722+
}
723+
)
718724
series_map = {}
719725
for col in itertools.chain(self.value_columns, self.index_columns):
720726
dtype = self.expr.get_column_type(col)

tests/system/small/test_dataframe_io.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,83 @@ def test_to_pandas_batches_w_empty_dataframe(session):
376376
pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
377377

378378

379+
def test_to_pandas_batches_w_empty_dataframe_json_in_list(session):
380+
"""Tests to_pandas_batches() with an empty DataFrame containing a list of JSON.
381+
382+
Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
383+
"""
384+
import db_dtypes
385+
386+
json_list_dtype = pd.ArrowDtype(pa.list_(db_dtypes.JSONArrowType()))
387+
empty_df_with_json_list = bpd.DataFrame(
388+
{
389+
"idx": pd.Series([], dtype="Int64"),
390+
"json_list_col": pd.Series([], dtype=json_list_dtype),
391+
},
392+
session=session,
393+
).set_index("idx", drop=True)
394+
395+
results = list(empty_df_with_json_list.to_pandas_batches())
396+
397+
assert len(results) == 1
398+
assert list(results[0].columns) == ["json_list_col"]
399+
assert results[0].dtypes["json_list_col"] == json_list_dtype
400+
assert len(results[0]) == 0
401+
402+
403+
# --- Behavior 2: JSON in Struct ---
404+
405+
406+
def test_to_pandas_batches_w_empty_dataframe_json_in_struct(session):
407+
"""Tests to_pandas_batches() with an empty DataFrame containing a struct of JSON.
408+
409+
Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
410+
"""
411+
import db_dtypes
412+
413+
json_struct_dtype = pd.ArrowDtype(
414+
pa.struct([("json_field", db_dtypes.JSONArrowType())])
415+
)
416+
empty_df_with_json_struct = bpd.DataFrame(
417+
{
418+
"idx": pd.Series([], dtype="Int64"),
419+
"json_struct_col": pd.Series([], dtype=json_struct_dtype),
420+
},
421+
session=session,
422+
).set_index("idx", drop=True)
423+
424+
results = list(empty_df_with_json_struct.to_pandas_batches())
425+
426+
assert len(results) == 1
427+
assert list(results[0].columns) == ["json_struct_col"]
428+
assert results[0].dtypes["json_struct_col"] == json_struct_dtype
429+
assert len(results[0]) == 0
430+
431+
432+
# --- Behavior 3: Simple JSON ---
433+
434+
435+
def test_to_pandas_batches_w_empty_dataframe_simple_json(session):
436+
"""Tests to_pandas_batches() with an empty DataFrame containing a simple JSON column.
437+
438+
Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
439+
"""
440+
empty_df_with_json = bpd.DataFrame(
441+
{
442+
"idx": pd.Series([], dtype="Int64"),
443+
"json_col": pd.Series([], dtype=dtypes.JSON_DTYPE),
444+
},
445+
session=session,
446+
).set_index("idx", drop=True)
447+
448+
results = list(empty_df_with_json.to_pandas_batches())
449+
450+
assert len(results) == 1
451+
assert list(results[0].columns) == ["json_col"]
452+
assert results[0].dtypes["json_col"] == dtypes.JSON_DTYPE
453+
assert len(results[0]) == 0
454+
455+
379456
@pytest.mark.parametrize("allow_large_results", (True, False))
380457
def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
381458
"""Verify to_pandas_batches() APIs returns the expected page size.

0 commit comments

Comments
 (0)