From 1b1658558b1a4103387a112fb0f04367ee611af9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D0=BA=D1=81=D0=B5=D0=BB=D1=8C=D1=80=D0=BE=D0=B4=20?=
 =?UTF-8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=9B=D0=B5=D0=BE=D0=BD?=
 =?UTF-8?q?=D0=B8=D0=B4=D0=BE=D0=B2=D0=B8=D1=87?= <alakselrod_1@edu.hse.ru>
Date: Sun, 19 Jan 2025 22:58:26 +0300
Subject: [PATCH 1/2] refactor(np_can_hold_element): reduce complexity by
 splitting logic

---
 pandas/core/dtypes/cast.py | 391 ++++++++++++++++++++++++-------------
 1 file changed, 253 insertions(+), 138 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 02b9291da9b31..1f4d84b8dc118 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1767,180 +1767,294 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
         return False
 
 
-def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
+def _handle_integer_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
     """
-    Raise if we cannot losslessly set this element into an ndarray with this dtype.
+    Handles casting or validation of an element for integer dtypes.
 
-    Specifically about places where we disagree with numpy.  i.e. there are
-    cases where numpy will raise in doing the setitem that we do not check
-    for here, e.g. setting str "X" into a numeric ndarray.
+    Parameters
+    ----------
+    dtype : np.dtype
+        Target numpy integer dtype.
+    element : Any
+        Element to be checked or casted.
+    tipo : np.dtype or None
+        Inferred dtype of the element, if available.
 
     Returns
     -------
     Any
-        The element, potentially cast to the dtype.
+        The element, potentially cast to the target dtype.
 
     Raises
     ------
-    ValueError : If we cannot losslessly store this element with this dtype.
+    LossySetitemError: If the element cannot be losslessly stored in the given integer dtype.
     """
-    if dtype == _dtype_obj:
-        return element
+    if isinstance(element, range):
+        if _dtype_can_hold_range(element, dtype):
+            return element
+        raise LossySetitemError
 
-    tipo = _maybe_infer_dtype_type(element)
+    if is_integer(element) or (is_float(element) and element.is_integer()):
+        # e.g. test_setitem_series_int8 if we have a python int 1
+        #  tipo may be np.int32, despite the fact that it will fit
+        #  in smaller int dtypes.
+        info = np.iinfo(dtype)
+        if info.min <= element <= info.max:
+            return dtype.type(element)
+        raise LossySetitemError
 
-    if dtype.kind in "iu":
-        if isinstance(element, range):
-            if _dtype_can_hold_range(element, dtype):
-                return element
-            raise LossySetitemError
+    if tipo is None:
+        raise LossySetitemError
 
-        if is_integer(element) or (is_float(element) and element.is_integer()):
-            # e.g. test_setitem_series_int8 if we have a python int 1
-            #  tipo may be np.int32, despite the fact that it will fit
-            #  in smaller int dtypes.
-            info = np.iinfo(dtype)
-            if info.min <= element <= info.max:
-                return dtype.type(element)
+    if tipo.kind not in "iu":
+        if isinstance(element, np.ndarray) and element.dtype.kind == "f":
+            # If all can be losslessly cast to integers, then we can hold them
+            with np.errstate(invalid="ignore"):
+                # We check afterwards if cast was losslessly, so no need to show
+                # the warning
+                casted = element.astype(dtype)
+            comp = casted == element
+            if comp.all():
+                # Return the casted values bc they can be passed to
+                #  np.putmask, whereas the raw values cannot.
+                #  see TestSetitemFloatNDarrayIntoIntegerSeries
+                return casted
             raise LossySetitemError
 
-        if tipo is not None:
-            if tipo.kind not in "iu":
-                if isinstance(element, np.ndarray) and element.dtype.kind == "f":
-                    # If all can be losslessly cast to integers, then we can hold them
-                    with np.errstate(invalid="ignore"):
-                        # We check afterwards if cast was losslessly, so no need to show
-                        # the warning
-                        casted = element.astype(dtype)
-                    comp = casted == element
-                    if comp.all():
-                        # Return the casted values bc they can be passed to
-                        #  np.putmask, whereas the raw values cannot.
-                        #  see TestSetitemFloatNDarrayIntoIntegerSeries
-                        return casted
-                    raise LossySetitemError
-
-                elif isinstance(element, ABCExtensionArray) and isinstance(
-                    element.dtype, CategoricalDtype
-                ):
-                    # GH#52927 setting Categorical value into non-EA frame
-                    # TODO: general-case for EAs?
-                    try:
-                        casted = element.astype(dtype)
-                    except (ValueError, TypeError) as err:
-                        raise LossySetitemError from err
-                    # Check for cases of either
-                    #  a) lossy overflow/rounding or
-                    #  b) semantic changes like dt64->int64
-                    comp = casted == element
-                    if not comp.all():
-                        raise LossySetitemError
-                    return casted
-
-                # Anything other than integer we cannot hold
-                raise LossySetitemError
-            if (
-                dtype.kind == "u"
-                and isinstance(element, np.ndarray)
-                and element.dtype.kind == "i"
-            ):
-                # see test_where_uint64
+        elif isinstance(element, ABCExtensionArray) and isinstance(
+                element.dtype, CategoricalDtype
+        ):
+            # GH#52927 setting Categorical value into non-EA frame
+            # TODO: general-case for EAs?
+            try:
                 casted = element.astype(dtype)
-                if (casted == element).all():
-                    # TODO: faster to check (element >=0).all()?  potential
-                    #  itemsize issues there?
-                    return casted
+            except (ValueError, TypeError) as err:
+                raise LossySetitemError from err
+            # Check for cases of either
+            #  a) lossy overflow/rounding or
+            #  b) semantic changes like dt64->int64
+            comp = casted == element
+            if not comp.all():
                 raise LossySetitemError
-            if dtype.itemsize < tipo.itemsize:
-                raise LossySetitemError
-            if not isinstance(tipo, np.dtype):
-                # i.e. nullable IntegerDtype; we can put this into an ndarray
-                #  losslessly iff it has no NAs
-                arr = element._values if isinstance(element, ABCSeries) else element
-                if arr._hasna:
-                    raise LossySetitemError
-                return element
-
-            return element
+            return casted
 
+        # Anything other than integer we cannot hold
         raise LossySetitemError
 
-    if dtype.kind == "f":
-        if lib.is_integer(element) or lib.is_float(element):
-            casted = dtype.type(element)
-            if np.isnan(casted) or casted == element:
-                return casted
-            # otherwise e.g. overflow see TestCoercionFloat32
+    if (
+            dtype.kind == "u"
+            and isinstance(element, np.ndarray)
+            and element.dtype.kind == "i"
+    ):
+        # see test_where_uint64
+        casted = element.astype(dtype)
+        if (casted == element).all():
+            # TODO: faster to check (element >=0).all()?  potential
+            #  itemsize issues there?
+            return casted
+        raise LossySetitemError
+    if dtype.itemsize < tipo.itemsize:
+        raise LossySetitemError
+    if not isinstance(tipo, np.dtype):
+        # i.e. nullable IntegerDtype; we can put this into an ndarray
+        #  losslessly iff it has no NAs
+        arr = element._values if isinstance(element, ABCSeries) else element
+        if arr._hasna:
             raise LossySetitemError
+        return element
+    return element
 
-        if tipo is not None:
-            # TODO: itemsize check?
-            if tipo.kind not in "iuf":
-                # Anything other than float/integer we cannot hold
-                raise LossySetitemError
-            if not isinstance(tipo, np.dtype):
-                # i.e. nullable IntegerDtype or FloatingDtype;
-                #  we can put this into an ndarray losslessly iff it has no NAs
-                if element._hasna:
-                    raise LossySetitemError
-                return element
-            elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:
-                if isinstance(element, np.ndarray):
-                    # e.g. TestDataFrameIndexingWhere::test_where_alignment
-                    casted = element.astype(dtype)
-                    if np.array_equal(casted, element, equal_nan=True):
-                        return casted
-                    raise LossySetitemError
+def _handle_float_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
+    """
+    Handles casting or validation of an element for floating-point dtypes.
 
-            return element
+    Parameters
+    ----------
+    dtype : np.dtype
+        Target numpy floating-point dtype.
+    element : Any
+        Element to be checked or casted.
+    tipo : np.dtype or None
+        Inferred dtype of the element, if available.
 
+    Returns
+    -------
+    Any
+        The element, potentially cast to the target dtype.
+
+    Raises
+    ------
+    LossySetitemError: If the element cannot be losslessly stored in the given float dtype.
+    """
+    if lib.is_integer(element) or lib.is_float(element):
+        casted = dtype.type(element)
+        if np.isnan(casted) or casted == element:
+            return casted
+        # otherwise e.g. overflow see TestCoercionFloat32
         raise LossySetitemError
 
-    if dtype.kind == "c":
-        if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):
-            if np.isnan(element):
-                # see test_where_complex GH#6345
-                return dtype.type(element)
+    if tipo is None:
+        raise LossySetitemError
 
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore")
-                casted = dtype.type(element)
-            if casted == element:
+    # TODO: itemsize check?
+    if tipo.kind not in "iuf":
+        # Anything other than float/integer we cannot hold
+        raise LossySetitemError
+    if not isinstance(tipo, np.dtype):
+        # i.e. nullable IntegerDtype or FloatingDtype;
+        #  we can put this into an ndarray losslessly iff it has no NAs
+        if element._hasna:
+            raise LossySetitemError
+        return element
+    elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:
+        if isinstance(element, np.ndarray):
+            # e.g. TestDataFrameIndexingWhere::test_where_alignment
+            casted = element.astype(dtype)
+            if np.array_equal(casted, element, equal_nan=True):
                 return casted
-            # otherwise e.g. overflow see test_32878_complex_itemsize
             raise LossySetitemError
 
-        if tipo is not None:
-            if tipo.kind in "iufc":
-                return element
-            raise LossySetitemError
+    return element
+
+def _handle_complex_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
+    """
+    Handles casting or validation of an element for complex dtypes.
+
+    Parameters
+    ----------
+    dtype : np.dtype
+        Target numpy complex dtype.
+    element : Any
+        Element to be checked or casted.
+    tipo : np.dtype or None
+        Inferred dtype of the element, if available.
+
+    Returns
+    -------
+    Any
+        The element, potentially cast to the target dtype.
+
+    Raises
+    ------
+    LossySetitemError: If the element cannot be losslessly stored in the given complex dtype.
+    """
+    if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):
+        if np.isnan(element):
+            # see test_where_complex GH#6345
+            return dtype.type(element)
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            casted = dtype.type(element)
+        if casted == element:
+            return casted
+        # otherwise e.g. overflow see test_32878_complex_itemsize
+        raise LossySetitemError
+    if tipo is None:
         raise LossySetitemError
+    if tipo.kind in "iufc":
+        return element
+    raise LossySetitemError
 
-    if dtype.kind == "b":
-        if tipo is not None:
-            if tipo.kind == "b":
-                if not isinstance(tipo, np.dtype):
-                    # i.e. we have a BooleanArray
-                    if element._hasna:
-                        # i.e. there are pd.NA elements
-                        raise LossySetitemError
-                return element
-            raise LossySetitemError
-        if lib.is_bool(element):
-            return element
+def _handle_boolean_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
+    """
+    Handles casting or validation of an element for boolean dtypes.
+
+    Parameters
+    ----------
+    dtype : np.dtype
+        Target numpy boolean dtype.
+    element : Any
+        Element to be checked or casted.
+    tipo : np.dtype or None
+        Inferred dtype of the element, if available.
+
+    Returns
+    -------
+    Any
+        The element, potentially cast to the target dtype.
+
+    Raises
+    ------
+    LossySetitemError: If the element cannot be losslessly stored in the given boolean dtype.
+    """
+    if lib.is_bool(element):
+        return element
+    if tipo is None:
         raise LossySetitemError
+    if tipo.kind == "b":
+        if not isinstance(tipo, np.dtype):
+            # i.e. we have a BooleanArray
+            if element._hasna:
+                # i.e. there are pd.NA elements
+                raise LossySetitemError
+        return element
+    raise LossySetitemError
 
-    if dtype.kind == "S":
-        # TODO: test tests.frame.methods.test_replace tests get here,
-        #  need more targeted tests.  xref phofl has a PR about this
-        if tipo is not None:
-            if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:
-                return element
-            raise LossySetitemError
-        if isinstance(element, bytes) and len(element) <= dtype.itemsize:
-            return element
+def _handle_string_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
+    """
+    Handles casting or validation of an element for string (byte) dtypes.
+
+    Parameters
+    ----------
+    dtype : np.dtype
+        Target numpy string dtype (e.g., 'S').
+    element : Any
+        Element to be checked or casted.
+    tipo : np.dtype or None
+        Inferred dtype of the element, if available.
+
+    Returns
+    -------
+    Any
+        The element, potentially cast to the target dtype.
+
+    Raises
+    ------
+    LossySetitemError: If the element cannot be losslessly stored in the given string dtype.
+    """
+    # TODO: test tests.frame.methods.test_replace tests get here,
+    #  need more targeted tests.  xref phofl has a PR about this
+    if isinstance(element, bytes) and len(element) <= dtype.itemsize:
+        return element
+    if tipo is None:
         raise LossySetitemError
+    if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:
+        return element
+    raise LossySetitemError
 
+def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
+    """
+    Raise if we cannot losslessly set this element into an ndarray with this dtype.
+
+    Specifically about places where we disagree with numpy.  i.e. there are
+    cases where numpy will raise in doing the setitem that we do not check
+    for here, e.g. setting str "X" into a numeric ndarray.
+
+    Returns
+    -------
+    Any
+        The element, potentially cast to the dtype.
+
+    Raises
+    ------
+    ValueError : If we cannot losslessly store this element with this dtype.
+    """
+    if dtype == _dtype_obj:
+        return element
+
+    tipo = _maybe_infer_dtype_type(element)
+
+    if dtype.kind in "iu":
+        return _handle_integer_dtype(dtype, element, tipo)
+    if dtype.kind == "f":
+        return _handle_float_dtype(dtype, element, tipo)
+    if dtype.kind == "c":
+        return _handle_complex_dtype(dtype, element, tipo)
+    if dtype.kind == "b":
+        return _handle_boolean_dtype(dtype, element, tipo)
+    if dtype.kind == "S":
+        return _handle_string_dtype(dtype, element, tipo)
     if dtype.kind == "V":
         # i.e. np.void, which cannot hold _anything_
         raise LossySetitemError
@@ -1948,6 +2062,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
     raise NotImplementedError(dtype)
 
 
+
 def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
     """
     _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints),

From 74845ea90c27e75f52d55f687914e967d0a629ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=90=D0=BA=D1=81=D0=B5=D0=BB=D1=8C=D1=80=D0=BE=D0=B4=20?=
 =?UTF-8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=9B=D0=B5=D0=BE=D0=BD?=
 =?UTF-8?q?=D0=B8=D0=B4=D0=BE=D0=B2=D0=B8=D1=87?= <alakselrod_1@edu.hse.ru>
Date: Sun, 19 Jan 2025 23:30:03 +0300
Subject: [PATCH 2/2] style: fix flake8 issues in np_can_hold_element refactor

---
 pandas/core/dtypes/cast.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 1f4d84b8dc118..05679408bd9e3 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1787,7 +1787,8 @@ def _handle_integer_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     Raises
     ------
-    LossySetitemError: If the element cannot be losslessly stored in the given integer dtype.
+    LossySetitemError: If the element cannot be
+    losslessly stored in the given integer dtype.
     """
     if isinstance(element, range):
         if _dtype_can_hold_range(element, dtype):
@@ -1822,7 +1823,7 @@ def _handle_integer_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
             raise LossySetitemError
 
         elif isinstance(element, ABCExtensionArray) and isinstance(
-                element.dtype, CategoricalDtype
+            element.dtype, CategoricalDtype
         ):
             # GH#52927 setting Categorical value into non-EA frame
             # TODO: general-case for EAs?
@@ -1842,9 +1843,9 @@ def _handle_integer_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
         raise LossySetitemError
 
     if (
-            dtype.kind == "u"
-            and isinstance(element, np.ndarray)
-            and element.dtype.kind == "i"
+        dtype.kind == "u"
+        and isinstance(element, np.ndarray)
+        and element.dtype.kind == "i"
     ):
         # see test_where_uint64
         casted = element.astype(dtype)
@@ -1864,6 +1865,7 @@ def _handle_integer_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
         return element
     return element
 
+
 def _handle_float_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
     """
     Handles casting or validation of an element for floating-point dtypes.
@@ -1884,7 +1886,8 @@ def _handle_float_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     Raises
     ------
-    LossySetitemError: If the element cannot be losslessly stored in the given float dtype.
+    LossySetitemError: If the element cannot be losslessly
+    stored in the given float dtype.
     """
     if lib.is_integer(element) or lib.is_float(element):
         casted = dtype.type(element)
@@ -1916,6 +1919,7 @@ def _handle_float_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     return element
 
+
 def _handle_complex_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
     """
     Handles casting or validation of an element for complex dtypes.
@@ -1936,7 +1940,8 @@ def _handle_complex_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     Raises
     ------
-    LossySetitemError: If the element cannot be losslessly stored in the given complex dtype.
+    LossySetitemError: If the element cannot be
+    losslessly stored in the given complex dtype.
     """
     if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):
         if np.isnan(element):
@@ -1956,6 +1961,7 @@ def _handle_complex_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
         return element
     raise LossySetitemError
 
+
 def _handle_boolean_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
     """
     Handles casting or validation of an element for boolean dtypes.
@@ -1976,7 +1982,8 @@ def _handle_boolean_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     Raises
     ------
-    LossySetitemError: If the element cannot be losslessly stored in the given boolean dtype.
+    LossySetitemError: If the element cannot be
+    losslessly stored in the given boolean dtype.
     """
     if lib.is_bool(element):
         return element
@@ -1991,6 +1998,7 @@ def _handle_boolean_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
         return element
     raise LossySetitemError
 
+
 def _handle_string_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
     """
     Handles casting or validation of an element for string (byte) dtypes.
@@ -2011,7 +2019,8 @@ def _handle_string_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
 
     Raises
     ------
-    LossySetitemError: If the element cannot be losslessly stored in the given string dtype.
+    LossySetitemError: If the element cannot be
+    losslessly stored in the given string dtype.
     """
     # TODO: test tests.frame.methods.test_replace tests get here,
     #  need more targeted tests.  xref phofl has a PR about this
@@ -2023,6 +2032,7 @@ def _handle_string_dtype(dtype: np.dtype, element: Any, tipo: np.dtype) -> Any:
         return element
     raise LossySetitemError
 
+
 def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
     """
     Raise if we cannot losslessly set this element into an ndarray with this dtype.
@@ -2062,7 +2072,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
     raise NotImplementedError(dtype)
 
 
-
 def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
     """
     _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints),