1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- import json
16-
15+ import db_dtypes # type: ignore
1716import geopandas as gpd # type: ignore
1817import pandas as pd
1918import pyarrow as pa
2423import bigframes .pandas as bpd
2524
2625
27- def _get_series_from_json (json_data ):
28- # Note: converts None to sql "null" and not to json none.
29- values = [
30- f"JSON '{ json .dumps (data )} '" if data is not None else "NULL"
31- for data in json_data
32- ]
33- sql = " UNION ALL " .join (
34- [f"SELECT { id } AS id, { value } AS data" for id , value in enumerate (values )]
35- )
36- df = bpd .read_gbq (sql ).set_index ("id" ).sort_index ()
37- return df ["data" ]
38-
39-
4026@pytest .mark .parametrize (
4127 ("json_path" , "expected_json" ),
4228 [
@@ -45,10 +31,11 @@ def _get_series_from_json(json_data):
4531 ],
4632)
4733def test_json_set_at_json_path (json_path , expected_json ):
48- s = _get_series_from_json ([{"a" : {"b" : {"c" : "tester" , "d" : []}}}])
34+ original_json = [{"a" : {"b" : {"c" : "tester" , "d" : []}}}]
35+ s = bpd .Series (original_json , dtype = db_dtypes .JSONDtype ())
4936 actual = bbq .json_set (s , json_path_value_pairs = [(json_path , 10 )])
5037
51- expected = _get_series_from_json (expected_json )
38+ expected = bpd . Series (expected_json , dtype = db_dtypes . JSONDtype () )
5239 pd .testing .assert_series_equal (
5340 actual .to_pandas (),
5441 expected .to_pandas (),
@@ -65,41 +52,43 @@ def test_json_set_at_json_path(json_path, expected_json):
6552 ],
6653)
6754def test_json_set_at_json_value_type (json_value , expected_json ):
68- s = _get_series_from_json ([{"a" : {"b" : "dev" }}, {"a" : {"b" : [1 , 2 ]}}])
55+ original_json = [{"a" : {"b" : "dev" }}, {"a" : {"b" : [1 , 2 ]}}]
56+ s = bpd .Series (original_json , dtype = db_dtypes .JSONDtype ())
6957 actual = bbq .json_set (s , json_path_value_pairs = [("$.a.b" , json_value )])
7058
71- expected = _get_series_from_json (expected_json )
59+ expected = bpd . Series (expected_json , dtype = db_dtypes . JSONDtype () )
7260 pd .testing .assert_series_equal (
7361 actual .to_pandas (),
7462 expected .to_pandas (),
7563 )
7664
7765
7866def test_json_set_w_more_pairs ():
79- s = _get_series_from_json ([{"a" : 2 }, {"b" : 5 }, {"c" : 1 }])
67+ original_json = [{"a" : 2 }, {"b" : 5 }, {"c" : 1 }]
68+ s = bpd .Series (original_json , dtype = db_dtypes .JSONDtype ())
8069 actual = bbq .json_set (
8170 s , json_path_value_pairs = [("$.a" , 1 ), ("$.b" , 2 ), ("$.a" , [3 , 4 , 5 ])]
8271 )
83- expected = _get_series_from_json (
84- [{"a" : 3 , "b" : 2 }, {"a" : 4 , "b" : 2 }, {"a" : 5 , "b" : 2 , "c" : 1 }]
85- )
72+
73+ expected_json = [{"a" : 3 , "b" : 2 }, {"a" : 4 , "b" : 2 }, {"a" : 5 , "b" : 2 , "c" : 1 }]
74+ expected = bpd . Series ( expected_json , dtype = db_dtypes . JSONDtype () )
8675 pd .testing .assert_series_equal (
8776 actual .to_pandas (),
8877 expected .to_pandas (),
8978 )
9079
9180
9281def test_json_set_w_invalid_json_path_value_pairs ():
82+ s = bpd .Series ([{"a" : 10 }], dtype = db_dtypes .JSONDtype ())
9383 with pytest .raises (ValueError ):
94- bbq .json_set (
95- _get_series_from_json ([{"a" : 10 }]), json_path_value_pairs = [("$.a" , 1 , 100 )] # type: ignore
96- )
84+ bbq .json_set (s , json_path_value_pairs = [("$.a" , 1 , 100 )]) # type: ignore
9785
9886
9987def test_json_set_w_invalid_value_type ():
88+ s = bpd .Series ([{"a" : 10 }], dtype = db_dtypes .JSONDtype ())
10089 with pytest .raises (TypeError ):
10190 bbq .json_set (
102- _get_series_from_json ([{ "a" : 10 }]) ,
91+ s ,
10392 json_path_value_pairs = [
10493 (
10594 "$.a" ,
@@ -117,19 +106,25 @@ def test_json_set_w_invalid_series_type():
117106
118107
119108def test_json_extract_from_json ():
120- s = _get_series_from_json ([{"a" : {"b" : [1 , 2 ]}}, {"a" : {"c" : 1 }}, {"a" : {"b" : 0 }}])
109+ s = bpd .Series (
110+ [{"a" : {"b" : [1 , 2 ]}}, {"a" : {"c" : 1 }}, {"a" : {"b" : 0 }}],
111+ dtype = db_dtypes .JSONDtype (),
112+ )
121113 actual = bbq .json_extract (s , "$.a.b" ).to_pandas ()
122- expected = _get_series_from_json ([[1 , 2 ], None , 0 ]).to_pandas ()
114+ expected = bpd . Series ([[1 , 2 ], None , 0 ], dtype = db_dtypes . JSONDtype () ).to_pandas ()
123115 pd .testing .assert_series_equal (
124116 actual ,
125117 expected ,
126118 )
127119
128120
129121def test_json_extract_from_string ():
130- s = bpd .Series (['{"a": {"b": [1, 2]}}' , '{"a": {"c": 1}}' , '{"a": {"b": 0}}' ])
122+ s = bpd .Series (
123+ ['{"a": {"b": [1, 2]}}' , '{"a": {"c": 1}}' , '{"a": {"b": 0}}' ],
124+ dtype = pd .StringDtype (storage = "pyarrow" ),
125+ )
131126 actual = bbq .json_extract (s , "$.a.b" )
132- expected = bpd .Series (["[1,2]" , None , "0" ])
127+ expected = bpd .Series (["[1,2]" , None , "0" ], dtype = pd . StringDtype ( storage = "pyarrow" ) )
133128 pd .testing .assert_series_equal (
134129 actual .to_pandas (),
135130 expected .to_pandas (),
@@ -142,8 +137,9 @@ def test_json_extract_w_invalid_series_type():
142137
143138
144139def test_json_extract_array_from_json ():
145- s = _get_series_from_json (
146- [{"a" : ["ab" , "2" , "3 xy" ]}, {"a" : []}, {"a" : ["4" , "5" ]}, {}]
140+ s = bpd .Series (
141+ [{"a" : ["ab" , "2" , "3 xy" ]}, {"a" : []}, {"a" : ["4" , "5" ]}, {}],
142+ dtype = db_dtypes .JSONDtype (),
147143 )
148144 actual = bbq .json_extract_array (s , "$.a" )
149145
@@ -160,6 +156,8 @@ def test_json_extract_array_from_json():
160156 """
161157 df = bpd .read_gbq (sql ).set_index ("id" ).sort_index ()
162158 expected = df ["data" ]
159+ expected .index .name = None
160+ expected .name = None
163161
164162 pd .testing .assert_series_equal (
165163 actual .to_pandas (),
0 commit comments