@@ -34,9 +34,7 @@ def test_read_arrow_basic(session):
3434 pa .array ([0.1 , 0.2 , 0.3 ], type = pa .float64 ()),
3535 pa .array (["foo" , "bar" , "baz" ], type = pa .string ()),
3636 ]
37- arrow_table = pa .Table .from_arrays (
38- data , names = ["ints" , "floats" , "strings" ]
39- )
37+ arrow_table = pa .Table .from_arrays (data , names = ["ints" , "floats" , "strings" ])
4038
4139 bf_df = bpd .read_arrow (arrow_table )
4240
@@ -142,13 +140,17 @@ def test_read_arrow_all_types(session):
142140 bf_pd_df = bf_df .to_pandas ()
143141
144142 for col in ["int_col" , "float_col" ]:
145- bf_pd_df [col ] = bf_pd_df [col ].astype (pd_expected [col ].dtype )
143+ bf_pd_df [col ] = bf_pd_df [col ].astype (pd_expected [col ].dtype )
146144
147145 bf_pd_df ["str_col" ] = bf_pd_df ["str_col" ].astype (pandas .ArrowDtype (pa .string ()))
148146 bf_pd_df ["ts_col" ] = pandas .to_datetime (bf_pd_df ["ts_col" ], utc = True )
149- bf_pd_df ["date_col" ] = bf_pd_df ["date_col" ].apply (lambda x : x .date () if hasattr (x , 'date' ) and x is not pandas .NaT else x )
147+ bf_pd_df ["date_col" ] = bf_pd_df ["date_col" ].apply (
148+ lambda x : x .date () if hasattr (x , "date" ) and x is not pandas .NaT else x
149+ )
150150 bf_pd_df ["bool_col" ] = bf_pd_df ["bool_col" ].astype (pandas .ArrowDtype (pa .bool_ ()))
151- pd_expected ["bool_col" ] = pd_expected ["bool_col" ].astype (pandas .ArrowDtype (pa .bool_ ()))
151+ pd_expected ["bool_col" ] = pd_expected ["bool_col" ].astype (
152+ pandas .ArrowDtype (pa .bool_ ())
153+ )
152154
153155 pandas .testing .assert_frame_equal (
154156 bf_pd_df , pd_expected , check_dtype = False , rtol = 1e-5
@@ -193,10 +195,18 @@ def test_read_arrow_list_types(session):
193195 bf_pd_df = bf_df .to_pandas ()
194196
195197 # Explicitly cast to ArrowDtype for comparison as pandas might default to object
196- pd_expected ["list_int_col" ] = pd_expected ["list_int_col" ].astype (pandas .ArrowDtype (pa .list_ (pa .int64 ())))
197- pd_expected ["list_str_col" ] = pd_expected ["list_str_col" ].astype (pandas .ArrowDtype (pa .list_ (pa .string ())))
198- bf_pd_df ["list_int_col" ] = bf_pd_df ["list_int_col" ].astype (pandas .ArrowDtype (pa .list_ (pa .int64 ())))
199- bf_pd_df ["list_str_col" ] = bf_pd_df ["list_str_col" ].astype (pandas .ArrowDtype (pa .list_ (pa .string ())))
198+ pd_expected ["list_int_col" ] = pd_expected ["list_int_col" ].astype (
199+ pandas .ArrowDtype (pa .list_ (pa .int64 ()))
200+ )
201+ pd_expected ["list_str_col" ] = pd_expected ["list_str_col" ].astype (
202+ pandas .ArrowDtype (pa .list_ (pa .string ()))
203+ )
204+ bf_pd_df ["list_int_col" ] = bf_pd_df ["list_int_col" ].astype (
205+ pandas .ArrowDtype (pa .list_ (pa .int64 ()))
206+ )
207+ bf_pd_df ["list_str_col" ] = bf_pd_df ["list_str_col" ].astype (
208+ pandas .ArrowDtype (pa .list_ (pa .string ()))
209+ )
200210
201211 pandas .testing .assert_frame_equal (bf_pd_df , pd_expected , check_dtype = True )
202212
@@ -214,7 +224,9 @@ def test_read_arrow_engine_streaming(session):
214224 assert str (bf_df .dtypes ["event" ]) == "string[pyarrow]"
215225 pd_expected = arrow_table .to_pandas ()
216226 bf_pd_df = bf_df .to_pandas ()
217- pandas .testing .assert_frame_equal (bf_pd_df .astype (pd_expected .dtypes ), pd_expected , check_dtype = False )
227+ pandas .testing .assert_frame_equal (
228+ bf_pd_df .astype (pd_expected .dtypes ), pd_expected , check_dtype = False
229+ )
218230
219231
220232def test_read_arrow_engine_write (session ):
@@ -230,7 +242,9 @@ def test_read_arrow_engine_write(session):
230242 assert str (bf_df .dtypes ["status" ]) == "string[pyarrow]"
231243 pd_expected = arrow_table .to_pandas ()
232244 bf_pd_df = bf_df .to_pandas ()
233- pandas .testing .assert_frame_equal (bf_pd_df .astype (pd_expected .dtypes ), pd_expected , check_dtype = False )
245+ pandas .testing .assert_frame_equal (
246+ bf_pd_df .astype (pd_expected .dtypes ), pd_expected , check_dtype = False
247+ )
234248
235249
236250def test_read_arrow_no_columns_empty_rows (session ):
@@ -241,7 +255,14 @@ def test_read_arrow_no_columns_empty_rows(session):
241255
242256
243257def test_read_arrow_special_column_names (session ):
244- col_names = ["col with space" , "col/slash" , "col.dot" , "col:colon" , "col(paren)" , "col[bracket]" ]
258+ col_names = [
259+ "col with space" ,
260+ "col/slash" ,
261+ "col.dot" ,
262+ "col:colon" ,
263+ "col(paren)" ,
264+ "col[bracket]" ,
265+ ]
245266 # BigQuery normalizes column names by replacing special characters with underscores.
246267 # Exception: dots are not allowed and usually cause errors or are handled by specific client libraries.
247268 # BigFrames aims to map to valid BigQuery column names.
@@ -267,10 +288,10 @@ def test_read_arrow_special_column_names(session):
267288 expected_bq_names = [
268289 "col_with_space" ,
269290 "col_slash" ,
270- "col_dot" , # BQ might error on dots or replace them. Let's assume replacement for now.
291+ "col_dot" , # BQ might error on dots or replace them. Let's assume replacement for now.
271292 "col_colon" ,
272293 "col_paren_" ,
273- "col_bracket_"
294+ "col_bracket_" ,
274295 ]
275296 # Update: Based on typical BigQuery behavior, dots are not allowed.
276297 # However, BigFrames might handle this by replacing dots with underscores before sending to BQ,
@@ -292,8 +313,8 @@ def test_read_arrow_special_column_names(session):
292313 # And arrow_table.to_pandas() will use the original names.
293314 # We then rename bf_pd_df columns to match pd_expected for data comparison.
294315
295- pd_expected = arrow_table .to_pandas () # Has original names
296- bf_pd_df = bf_df .to_pandas () # Has BQ/BF names
316+ pd_expected = arrow_table .to_pandas () # Has original names
317+ bf_pd_df = bf_df .to_pandas () # Has BQ/BF names
297318
298319 assert len (bf_pd_df .columns ) == len (pd_expected .columns )
299320
0 commit comments