@@ -265,7 +265,13 @@ def _adapt_pandas_series(
265265) -> tuple [Union [pa .ChunkedArray , pa .Array ], bigframes .dtypes .Dtype ]:
266266 # Mostly rely on pyarrow conversions, but have to convert geo without its help.
267267 if series .dtype == bigframes .dtypes .GEO_DTYPE :
268- series = geopandas .GeoSeries (series ).to_wkt (rounding_precision = - 1 )
268+ # geoseries produces eg "POINT (1, 1)", while bq uses style "POINT(1, 1)"
269+ # we normalize to bq style for consistency
270+ series = (
271+ geopandas .GeoSeries (series )
272+ .to_wkt (rounding_precision = - 1 )
273+ .str .replace (r"(\w+) \(" , repl = r"\1(" , regex = True )
274+ )
269275 return pa .array (series , type = pa .string ()), bigframes .dtypes .GEO_DTYPE
270276 try :
271277 return _adapt_arrow_array (pa .array (series ))
@@ -326,7 +332,7 @@ def _adapt_arrow_array(array: pa.Array) -> tuple[pa.Array, bigframes.dtypes.Dtyp
326332 return new_value .fill_null ([]), bigframes .dtypes .list_type (values_type )
327333 if array .type == bigframes .dtypes .JSON_ARROW_TYPE :
328334 return _canonicalize_json (array ), bigframes .dtypes .JSON_DTYPE
329- target_type = _logical_type_replacements (array .type )
335+ target_type = logical_type_replacements (array .type )
330336 if target_type != array .type :
331337 # TODO: Maybe warn if lossy conversion?
332338 array = array .cast (target_type )
@@ -372,6 +378,10 @@ def recursive_f(type: pa.DataType) -> pa.DataType:
372378 if new_field_t != type .value_type :
373379 return pa .list_ (new_field_t )
374380 return type
381+ # polars can produce large lists, and we want to map these down to regular lists
382+ if pa .types .is_large_list (type ):
383+ new_field_t = recursive_f (type .value_type )
384+ return pa .list_ (new_field_t )
375385 if pa .types .is_struct (type ):
376386 struct_type = cast (pa .StructType , type )
377387 new_fields : list [pa .Field ] = []
@@ -385,7 +395,7 @@ def recursive_f(type: pa.DataType) -> pa.DataType:
385395
386396
387397@_recursive_map_types
388- def _logical_type_replacements (type : pa .DataType ) -> pa .DataType :
398+ def logical_type_replacements (type : pa .DataType ) -> pa .DataType :
389399 if pa .types .is_timestamp (type ):
390400 # This is potentially lossy, but BigFrames doesn't support ns
391401 new_tz = "UTC" if (type .tz is not None ) else None
@@ -403,8 +413,11 @@ def _logical_type_replacements(type: pa.DataType) -> pa.DataType:
403413 if pa .types .is_large_string (type ):
404414 # simple string type can handle the largest strings needed
405415 return pa .string ()
416+ if pa .types .is_large_binary (type ):
417+ # simple string type can handle the largest strings needed
418+ return pa .binary ()
406419 if pa .types .is_dictionary (type ):
407- return _logical_type_replacements (type .value_type )
420+ return logical_type_replacements (type .value_type )
408421 if pa .types .is_null (type ):
409422 # null as a type not allowed, default type is float64 for bigframes
410423 return pa .float64 ()
0 commit comments