From 4959b784ce56354afeab948ba5e3876b2ad8faf7 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Wed, 15 Oct 2025 23:24:58 -0400 Subject: [PATCH 1/3] fix np.ndarray.astype runtimewarning --- pandas/core/dtypes/cast.py | 4 +-- .../frame/methods/test_convert_dtypes.py | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3b615c70ebea2..2c4221b52bcd3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -960,7 +960,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we cannot convert to IntegerDtype pass - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr), equal_nan=True): inferred_dtype = target_int_dtype else: inferred_dtype = input_array.dtype @@ -987,7 +987,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we can't convert to IntegerDtype inferred_dtype = inferred_float_dtype - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr)): inferred_dtype = pandas_dtype_func("Int64") else: inferred_dtype = inferred_float_dtype diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e90786a43c483..56706220a920f 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -228,3 +228,30 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): result = df.convert_dtypes(dtype_backend="pyarrow") expected = df.copy() tm.assert_frame_equal(result, expected) + + def test_convert_dtype_pyarrow_int64_limits_warning(self): + # GH 58485 + pytest.importorskip("pyarrow") + data = { + "a": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "b": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "c": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + } + result = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") + expected = pd.DataFrame(data, dtype="int64[pyarrow]") + tm.assert_frame_equal(result, expected) From 7a0282a9cc5ddb7f11cae3cc19b2253fbe0cb84f Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Wed, 15 Oct 2025 23:28:22 -0400 Subject: [PATCH 2/3] update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..a8f4f9741044d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1007,6 +1007,7 @@ Conversion - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`) +- Bug in :meth:`DataFrame.convert_dtypes` with `dtype_backend='pyarrow'` for large `int64` values (:issue:`58485`) Strings ^^^^^^^ From aa4ab6172aabe5f8877d5257d51b12cda8cf98c5 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Wed, 15 Oct 2025 23:33:28 -0400 Subject: [PATCH 3/3] fix precommit --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a8f4f9741044d..08ba3d78b5b72 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1002,12 +1002,12 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) +- Bug in :meth:`DataFrame.convert_dtypes` with ``dtype_backend='pyarrow'`` for large ``int64`` values (:issue:`58485`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`) -- Bug in :meth:`DataFrame.convert_dtypes` with `dtype_backend='pyarrow'` for large `int64` values (:issue:`58485`) Strings ^^^^^^^