1313# limitations under the License.
1414
1515import datetime as dt
16+ import json
1617import math
1718import re
1819import tempfile
1920
2021import db_dtypes # type: ignore
2122import geopandas as gpd # type: ignore
23+ import google .api_core .exceptions
2224import numpy
2325from packaging .version import Version
2426import pandas as pd
@@ -3474,9 +3476,11 @@ def foo(x):
34743476 ("int64_col" , pd .ArrowDtype (pa .timestamp ("us" ))),
34753477 ("int64_col" , pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" ))),
34763478 ("int64_col" , "time64[us][pyarrow]" ),
3479+ ("int64_col" , pd .ArrowDtype (db_dtypes .JSONArrowType ())),
34773480 ("bool_col" , "Int64" ),
34783481 ("bool_col" , "string[pyarrow]" ),
34793482 ("bool_col" , "Float64" ),
3483+ ("bool_col" , pd .ArrowDtype (db_dtypes .JSONArrowType ())),
34803484 ("string_col" , "binary[pyarrow]" ),
34813485 ("bytes_col" , "string[pyarrow]" ),
34823486 # pandas actually doesn't let folks convert to/from naive timestamp and
@@ -3541,7 +3545,7 @@ def test_astype_safe(session):
35413545 pd .testing .assert_series_equal (result , exepcted )
35423546
35433547
3544- def test_series_astype_error_error (session ):
3548+ def test_series_astype_w_invalid_error (session ):
35453549 input = pd .Series (["hello" , "world" , "3.11" , "4000" ])
35463550 with pytest .raises (ValueError ):
35473551 session .read_pandas (input ).astype ("Float64" , errors = "bad_value" )
@@ -3676,6 +3680,119 @@ def test_timestamp_astype_string():
36763680 assert bf_result .dtype == "string[pyarrow]"
36773681
36783682
3683+ @pytest .mark .parametrize ("errors" , ["raise" , "null" ])
3684+ def test_float_astype_json (errors ):
3685+ data = ["1.25" , "2500000000" , None , "-12323.24" ]
3686+ bf_series = series .Series (data , dtype = dtypes .FLOAT_DTYPE )
3687+
3688+ bf_result = bf_series .astype (dtypes .JSON_DTYPE , errors = errors )
3689+ assert bf_result .dtype == dtypes .JSON_DTYPE
3690+
3691+ expected_result = pd .Series (data , dtype = dtypes .JSON_DTYPE )
3692+ expected_result .index = expected_result .index .astype ("Int64" )
3693+ pd .testing .assert_series_equal (bf_result .to_pandas (), expected_result )
3694+
3695+
3696+ @pytest .mark .parametrize ("errors" , ["raise" , "null" ])
3697+ def test_string_astype_json (errors ):
3698+ data = [
3699+ "1" ,
3700+ None ,
3701+ '["1","3","5"]' ,
3702+ '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}' ,
3703+ ]
3704+ bf_series = series .Series (data , dtype = dtypes .STRING_DTYPE )
3705+
3706+ bf_result = bf_series .astype (dtypes .JSON_DTYPE , errors = errors )
3707+ assert bf_result .dtype == dtypes .JSON_DTYPE
3708+
3709+ pd_result = bf_series .to_pandas ().astype (dtypes .JSON_DTYPE )
3710+ pd .testing .assert_series_equal (bf_result .to_pandas (), pd_result )
3711+
3712+
3713+ def test_string_astype_json_in_safe_mode ():
3714+ data = ["this is not a valid json string" ]
3715+ bf_series = series .Series (data , dtype = dtypes .STRING_DTYPE )
3716+ bf_result = bf_series .astype (dtypes .JSON_DTYPE , errors = "null" )
3717+ assert bf_result .dtype == dtypes .JSON_DTYPE
3718+
3719+ expected = pd .Series ([None ], dtype = dtypes .JSON_DTYPE )
3720+ expected .index = expected .index .astype ("Int64" )
3721+ pd .testing .assert_series_equal (bf_result .to_pandas (), expected )
3722+
3723+
3724+ def test_string_astype_json_raise_error ():
3725+ data = ["this is not a valid json string" ]
3726+ bf_series = series .Series (data , dtype = dtypes .STRING_DTYPE )
3727+ with pytest .raises (
3728+ google .api_core .exceptions .BadRequest ,
3729+ match = "syntax error while parsing value" ,
3730+ ):
3731+ bf_series .astype (dtypes .JSON_DTYPE , errors = "raise" ).to_pandas ()
3732+
3733+
3734+ @pytest .mark .parametrize ("errors" , ["raise" , "null" ])
3735+ @pytest .mark .parametrize (
3736+ ("data" , "to_type" ),
3737+ [
3738+ pytest .param (["1" , "10.0" , None ], dtypes .INT_DTYPE , id = "to_int" ),
3739+ pytest .param (["0.0001" , "2500000000" , None ], dtypes .FLOAT_DTYPE , id = "to_float" ),
3740+ pytest .param (["true" , "false" , None ], dtypes .BOOL_DTYPE , id = "to_bool" ),
3741+ pytest .param (['"str"' , None ], dtypes .STRING_DTYPE , id = "to_string" ),
3742+ pytest .param (
3743+ ['"str"' , None ],
3744+ dtypes .TIME_DTYPE ,
3745+ id = "invalid" ,
3746+ marks = pytest .mark .xfail (raises = TypeError ),
3747+ ),
3748+ ],
3749+ )
3750+ def test_json_astype_others (data , to_type , errors ):
3751+ bf_series = series .Series (data , dtype = dtypes .JSON_DTYPE )
3752+
3753+ bf_result = bf_series .astype (to_type , errors = errors )
3754+ assert bf_result .dtype == to_type
3755+
3756+ load_data = [json .loads (item ) if item is not None else None for item in data ]
3757+ expected = pd .Series (load_data , dtype = to_type )
3758+ expected .index = expected .index .astype ("Int64" )
3759+ pd .testing .assert_series_equal (bf_result .to_pandas (), expected )
3760+
3761+
3762+ @pytest .mark .parametrize (
3763+ ("data" , "to_type" ),
3764+ [
3765+ pytest .param (["10.2" , None ], dtypes .INT_DTYPE , id = "to_int" ),
3766+ pytest .param (["false" , None ], dtypes .FLOAT_DTYPE , id = "to_float" ),
3767+ pytest .param (["10.2" , None ], dtypes .BOOL_DTYPE , id = "to_bool" ),
3768+ pytest .param (["true" , None ], dtypes .STRING_DTYPE , id = "to_string" ),
3769+ ],
3770+ )
3771+ def test_json_astype_others_raise_error (data , to_type ):
3772+ bf_series = series .Series (data , dtype = dtypes .JSON_DTYPE )
3773+ with pytest .raises (google .api_core .exceptions .BadRequest ):
3774+ bf_series .astype (to_type , errors = "raise" ).to_pandas ()
3775+
3776+
3777+ @pytest .mark .parametrize (
3778+ ("data" , "to_type" ),
3779+ [
3780+ pytest .param (["10.2" , None ], dtypes .INT_DTYPE , id = "to_int" ),
3781+ pytest .param (["false" , None ], dtypes .FLOAT_DTYPE , id = "to_float" ),
3782+ pytest .param (["10.2" , None ], dtypes .BOOL_DTYPE , id = "to_bool" ),
3783+ pytest .param (["true" , None ], dtypes .STRING_DTYPE , id = "to_string" ),
3784+ ],
3785+ )
3786+ def test_json_astype_others_in_safe_mode (data , to_type ):
3787+ bf_series = series .Series (data , dtype = dtypes .JSON_DTYPE )
3788+ bf_result = bf_series .astype (to_type , errors = "null" )
3789+ assert bf_result .dtype == to_type
3790+
3791+ expected = pd .Series ([None , None ], dtype = to_type )
3792+ expected .index = expected .index .astype ("Int64" )
3793+ pd .testing .assert_series_equal (bf_result .to_pandas (), expected )
3794+
3795+
36793796@pytest .mark .parametrize (
36803797 "index" ,
36813798 [0 , 5 , - 2 ],
@@ -3687,9 +3804,7 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
36873804 assert bf_result == pd_result
36883805
36893806
3690- def test_iloc_single_integer_out_of_bound_error (
3691- scalars_df_index , scalars_pandas_df_index
3692- ):
3807+ def test_iloc_single_integer_out_of_bound_error (scalars_df_index ):
36933808 with pytest .raises (IndexError , match = "single positional indexer is out-of-bounds" ):
36943809 scalars_df_index .string_col .iloc [99 ]
36953810
0 commit comments