@@ -2449,18 +2449,18 @@ def maybe_convert_numeric(
24492449@ cython.wraparound (False )
24502450def _convert_to_pyarrow (
24512451 ndarray[object] objects ,
2452- ndarray[uint8_t] mask ) -> "ArrayLike":
2452+ ndarray[uint8_t] mask ,
2453+ object na_value = None ) -> "ArrayLike":
24532454 from pandas.core.dtypes.dtypes import ArrowDtype
24542455
24552456 from pandas.core.arrays.string_ import StringDtype
24562457
2457- na_value = np.nan
2458- if mask is not None and any(mask ):
2459- na_value = objects[mask][0 ]
2460-
2458+ # pa.array does not support na_value as pd.NA ,
2459+ # so we replace them by None and then restore them after
24612460 objects[mask] = None
24622461 pa_array = pa.array(objects)
24632462
2463+ # Pyarrow large string are StringDtype (not ArrowDtype )
24642464 if pa.types.is_large_string(pa_array.type ):
24652465 dtype = StringDtype(storage = " pyarrow" , na_value = na_value)
24662466 else :
@@ -2510,7 +2510,8 @@ def maybe_convert_objects(ndarray[object] objects,
25102510 bint convert_to_nullable_dtype = False ,
25112511 bint convert_non_numeric = False ,
25122512 object dtype_if_all_nat = None ,
2513- str storage = None ) -> "ArrayLike":
2513+ str storage = None ,
2514+ object na_value = None ) -> "ArrayLike":
25142515 """
25152516 Type inference function-- convert object array to proper dtype
25162517
@@ -2712,7 +2713,7 @@ def maybe_convert_objects(ndarray[object] objects,
27122713 break
27132714
27142715 if storage == " pyarrow" :
2715- return _convert_to_pyarrow(objects, mask)
2716+ return _convert_to_pyarrow(objects, mask, na_value )
27162717
27172718 numpy_dtype = None
27182719 if len (val_types) == 1 :
@@ -3009,6 +3010,7 @@ def map_infer_mask(
30093010 convert_to_nullable_dtype = convert_to_nullable_dtype,
30103011 convert_non_numeric = convert_non_numeric,
30113012 storage = storage,
3013+ na_value = na_value,
30123014 )
30133015 else :
30143016 return result
@@ -3079,6 +3081,7 @@ def map_infer(
30793081 convert_to_nullable_dtype = convert_to_nullable_dtype,
30803082 convert_non_numeric = True ,
30813083 storage = storage,
3084+ na_value = na_value,
30823085 )
30833086 else :
30843087 return result
0 commit comments