Skip to content

Commit ebb8d26

Browse files
committed
code clean up: _convert_to_pyarrow simplification
1 parent c10a244 commit ebb8d26

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

pandas/_libs/lib.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def maybe_convert_objects(
102102
convert_to_nullable_dtype: Literal[False] = ...,
103103
dtype_if_all_nat: DtypeObj | None = ...,
104104
storage: str | None = ...,
105+
na_value: Any = ...,
105106
) -> npt.NDArray[np.object_ | np.number]: ...
106107
@overload
107108
def maybe_convert_objects(
@@ -114,6 +115,7 @@ def maybe_convert_objects(
114115
convert_to_nullable_dtype: Literal[True] = ...,
115116
dtype_if_all_nat: DtypeObj | None = ...,
116117
storage: str | None = ...,
118+
na_value: Any = ...,
117119
) -> ArrayLike: ...
118120
@overload
119121
def maybe_convert_objects(
@@ -126,6 +128,7 @@ def maybe_convert_objects(
126128
convert_to_nullable_dtype: bool = ...,
127129
dtype_if_all_nat: DtypeObj | None = ...,
128130
storage: str | None = ...,
131+
na_value: Any = ...,
129132
) -> ArrayLike: ...
130133
@overload
131134
def maybe_convert_numeric(

pandas/_libs/lib.pyx

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,18 +2449,18 @@ def maybe_convert_numeric(
24492449
@cython.wraparound(False)
24502450
def _convert_to_pyarrow(
24512451
ndarray[object] objects,
2452-
ndarray[uint8_t] mask) -> "ArrayLike":
2452+
ndarray[uint8_t] mask,
2453+
object na_value=None) -> "ArrayLike":
24532454
from pandas.core.dtypes.dtypes import ArrowDtype
24542455

24552456
from pandas.core.arrays.string_ import StringDtype
24562457

2457-
na_value = np.nan
2458-
if mask is not None and any(mask):
2459-
na_value = objects[mask][0]
2460-
2458+
# pa.array does not support na_value as pd.NA,
2459+
# so we replace them by None and then restore them after
24612460
objects[mask] = None
24622461
pa_array = pa.array(objects)
24632462

2463+
# Pyarrow large string are StringDtype (not ArrowDtype)
24642464
if pa.types.is_large_string(pa_array.type):
24652465
dtype = StringDtype(storage="pyarrow", na_value=na_value)
24662466
else:
@@ -2510,7 +2510,8 @@ def maybe_convert_objects(ndarray[object] objects,
25102510
bint convert_to_nullable_dtype=False,
25112511
bint convert_non_numeric=False,
25122512
object dtype_if_all_nat=None,
2513-
str storage=None) -> "ArrayLike":
2513+
str storage=None,
2514+
object na_value=None) -> "ArrayLike":
25142515
"""
25152516
Type inference function-- convert object array to proper dtype
25162517

@@ -2712,7 +2713,7 @@ def maybe_convert_objects(ndarray[object] objects,
27122713
break
27132714

27142715
if storage == "pyarrow":
2715-
return _convert_to_pyarrow(objects, mask)
2716+
return _convert_to_pyarrow(objects, mask, na_value)
27162717

27172718
numpy_dtype = None
27182719
if len(val_types) == 1:
@@ -3009,6 +3010,7 @@ def map_infer_mask(
30093010
convert_to_nullable_dtype=convert_to_nullable_dtype,
30103011
convert_non_numeric=convert_non_numeric,
30113012
storage=storage,
3013+
na_value=na_value,
30123014
)
30133015
else:
30143016
return result
@@ -3079,6 +3081,7 @@ def map_infer(
30793081
convert_to_nullable_dtype=convert_to_nullable_dtype,
30803082
convert_non_numeric=True,
30813083
storage=storage,
3084+
na_value=na_value,
30823085
)
30833086
else:
30843087
return result

0 commit comments

Comments
 (0)