Skip to content

Commit 2dc2dd0

Browse files
committed
feat: support bpd.Series(json_data, dtype="json")
1 parent 3e6dfe7 commit 2dc2dd0

File tree

7 files changed

+64
-7
lines changed

7 files changed

+64
-7
lines changed

bigframes/core/indexes/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ def __new__(
8686
pd_df = pandas.DataFrame(index=data)
8787
block = df.DataFrame(pd_df, session=session)._block
8888
else:
89+
if isinstance(dtype, str):
90+
dtype = bigframes.dtypes.bigframes_type(dtype)
8991
pd_index = pandas.Index(data=data, dtype=dtype, name=name)
9092
pd_df = pandas.DataFrame(index=pd_index)
9193
block = df.DataFrame(pd_df, session=session)._block

bigframes/dataframe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def __init__(
198198
else:
199199
import bigframes.pandas
200200

201+
if isinstance(dtype, str):
202+
dtype = bigframes.dtypes.bigframes_type(dtype)
201203
pd_dataframe = pandas.DataFrame(
202204
data=data,
203205
index=index, # type:ignore

bigframes/dtypes.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -637,17 +637,19 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
637637
return BIGFRAMES_STRING_TO_BIGFRAMES[
638638
typing.cast(DtypeString, str(dtype_string))
639639
]
640+
if isinstance(dtype_string, str) and dtype_string.lower() == "json":
641+
return JSON_DTYPE
640642
raise TypeError(
641643
textwrap.dedent(
642644
f"""
643-
Unexpected data type string {dtype_string}. The following
645+
Unexpected data type string `{dtype_string}`. The following
644646
dtypes are supppted: 'boolean','Float64','Int64',
645-
'int64[pyarrow]','string','string[pyarrow]',
647+
'int64[pyarrow]','string','string[pyarrow]','json',
646648
'timestamp[us, tz=UTC][pyarrow]','timestamp[us][pyarrow]',
647649
'date32[day][pyarrow]','time64[us][pyarrow]'.
648-
The following pandas.ExtensionDtype are supported:
649-
pandas.BooleanDtype(), pandas.Float64Dtype(),
650-
pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
650+
The following pandas `ExtensionDtype` are supported:
651+
pd.BooleanDtype(), pd.Float64Dtype(),
652+
pd.Int64Dtype(), pd.StringDtype(storage="pyarrow"),
651653
pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
652654
pd.ArrowDtype(pa.timestamp("us")),
653655
pd.ArrowDtype(pa.timestamp("us", tz="UTC")).

bigframes/operations/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ def __init__(
121121
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
122122
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
123123
else:
124+
if isinstance(dtype, str):
125+
dtype = bigframes.dtypes.bigframes_type(dtype)
124126
pd_series = pd.Series(
125127
data=data,
126128
index=index, # type:ignore

tests/system/small/test_dataframe.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,26 @@ def test_df_construct_from_dict():
180180
)
181181

182182

183+
@pytest.mark.parametrize(
184+
("json_type"),
185+
[
186+
pytest.param(dtypes.JSON_DTYPE),
187+
pytest.param("json"),
188+
],
189+
)
190+
def test_df_construct_w_json_dtype(json_type):
191+
data = [
192+
"1",
193+
"false",
194+
'["a", {"b": 1}, null]',
195+
None,
196+
]
197+
df = dataframe.DataFrame({"json_col": data}, dtype=json_type)
198+
199+
assert df["json_col"].dtype == dtypes.JSON_DTYPE
200+
assert df["json_col"][1] == "false"
201+
202+
183203
def test_df_construct_inline_respects_location(reset_default_session_and_location):
184204
# Note: This starts a thread-local session.
185205
with bpd.option_context("bigquery.location", "europe-west1"):

tests/system/small/test_index.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas as pd
1919
import pytest
2020

21+
from bigframes import dtypes
2122
import bigframes.pandas as bpd
2223
from bigframes.testing.utils import assert_pandas_index_equal_ignore_index_type
2324

@@ -61,6 +62,26 @@ def test_index_construct_from_index():
6162
pd.testing.assert_index_equal(bf_result, pd_result)
6263

6364

65+
@pytest.mark.parametrize(
66+
("json_type"),
67+
[
68+
pytest.param(dtypes.JSON_DTYPE),
69+
pytest.param("json"),
70+
],
71+
)
72+
def test_index_construct_w_json_dtype(json_type):
73+
data = [
74+
"1",
75+
"false",
76+
'["a", {"b": 1}, null]',
77+
None,
78+
]
79+
index = bpd.Index(data, dtype=json_type)
80+
81+
assert index.dtype == dtypes.JSON_DTYPE
82+
assert index[1] == "false"
83+
84+
6485
def test_get_index(scalars_df_index, scalars_pandas_df_index):
6586
index = scalars_df_index.index
6687
bf_result = index.to_pandas()

tests/system/small/test_series.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,14 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session
326326
pd.testing.assert_index_equal(series.index.to_pandas(), expected)
327327

328328

329-
def test_series_construct_w_dtype_for_json():
329+
@pytest.mark.parametrize(
330+
("json_type"),
331+
[
332+
pytest.param(dtypes.JSON_DTYPE),
333+
pytest.param("json"),
334+
],
335+
)
336+
def test_series_construct_w_json_dtype(json_type):
330337
data = [
331338
"1",
332339
'"str"',
@@ -335,8 +342,9 @@ def test_series_construct_w_dtype_for_json():
335342
None,
336343
'{"a": {"b": [1, 2, 3], "c": true}}',
337344
]
338-
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
345+
s = bigframes.pandas.Series(data, dtype=json_type)
339346

347+
assert s.dtype == dtypes.JSON_DTYPE
340348
assert s[0] == "1"
341349
assert s[1] == '"str"'
342350
assert s[2] == "false"

0 commit comments

Comments
 (0)