diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 34375e8164085..7ba8ab0f22310 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1115,6 +1115,7 @@ Numeric - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) - Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`) - Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`) +- Bug in :meth:`Series.var` incorrectly computing variance for complex arrays by discarding the imaginary part during mean calculation (:issue:`62421`) - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) - Bug in arithmetic operations between objects with numpy-nullable dtype and :class:`ArrowDtype` incorrectly raising (:issue:`58602`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 772f425beed96..c1a11151c7c09 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1013,7 +1013,11 @@ def nanvar( # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if values.dtype.kind == "c": + # For complex numbers, preserve the dtype to avoid discarding imaginary part + avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count + else: + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) if values.dtype.kind == "c": diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index c4fe681de7cea..ba2e1e1dec19b 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -787,6 +787,27 @@ def test_var_complex_array(self): assert ser.var(ddof=1) == 1.0 assert ser.std(ddof=1) == 1.0 + @pytest.mark.parametrize( + "values,ddof,expected", + [ + ([1 + 2j, 2 + 3j, 3 + 4j], 1, 2.0), + ([1 + 2j, 2 + 3j, 3 + 4j], 0, 4 / 3), + ([1 + 2j, 2 + 3j, 3 + 4j, np.nan + 0j], 1, 2.0), + ], + ) + def test_var_complex_values(self, values, ddof, expected): + # GH#62421 + ser = Series(values, dtype=np.complex128) + result = ser.var(ddof=ddof) + tm.assert_almost_equal(result, expected) + + def test_var_complex_dtype_preserved(self): + # GH#62421 + ser = Series([1 + 2j, 2 + 3j, 3 + 4j], dtype=np.complex128) + mean = ser.mean() + assert isinstance(mean, complex) + assert mean == 2 + 3j + @pytest.mark.parametrize("dtype", ("m8[ns]", "M8[ns]", "M8[ns, UTC]")) def test_empty_timeseries_reductions_return_nat(self, dtype, skipna): # covers GH#11245