From 9dbe5dcaa1565f03f4373d6c1ca949dca00f4988 Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Sat, 23 Nov 2024 01:02:10 -0800 Subject: [PATCH 01/10] Fix formatting of complex floats with exponents --- pandas/io/formats/format.py | 2 +- pandas/tests/io/formats/test_to_string.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 861f5885f80c6..4f87b1a30ca61 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1749,7 +1749,7 @@ def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[st # The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""] # Therefore, the imaginary part is the 4th and 3rd last elements, # and the real part is everything before the imaginary part - trimmed = re.split(r"([j+-])", x) + trimmed = re.split(r"(? Date: Mon, 25 Nov 2024 13:36:08 -0500 Subject: [PATCH 02/10] fix issue #60410 (#60412) --- doc/source/user_guide/window.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index e25c4c2441920..0581951d5bfad 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -567,9 +567,9 @@ One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass \alpha = \begin{cases} - \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ - \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ - 1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 + \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ + \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ + 1 - e^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 \end{cases} One must specify precisely one of **span**, **center of mass**, **half-life** From 8b05baf891d8704df9520b8665ce9278599cf0fd Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:09:31 +0530 Subject: [PATCH 03/10] DOC: fix SA01 for pandas.errors.UnsortedIndexError (#60404) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 772793702f8b8..2a8b5f15d95f3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -119,7 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.PossibleDataLossError SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ - -i "pandas.errors.UnsortedIndexError SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ -i "pandas.infer_freq SA01" \ -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 68bd70603abae..d6d2fd82858ed 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -100,6 +100,11 @@ class UnsortedIndexError(KeyError): Subclass of `KeyError`. + See Also + -------- + DataFrame.sort_index : Sort a DataFrame by its index. + DataFrame.set_index : Set the DataFrame index using existing columns. + Examples -------- >>> df = pd.DataFrame( From bc09236360ff63d6d0029de99b0ff92b32a28ef5 Mon Sep 17 00:00:00 2001 From: lfffkh <167774581+lfffkh@users.noreply.github.com> Date: Tue, 26 Nov 2024 02:40:37 +0800 Subject: [PATCH 04/10] Fix BUG: Cannot shift Intervals that are not closed='right' (the default) (#60407) first --- pandas/core/arrays/interval.py | 4 +++- pandas/tests/frame/methods/test_shift.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f47ef095a8409..bbbf1d9ca60bd 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1055,7 +1055,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: from pandas import Index fill_value = Index(self._left, copy=False)._na_value - empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + empty = IntervalArray.from_breaks( + [fill_value] * (empty_len + 1), closed=self.closed + ) else: empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index a0f96ff111444..b52240c208493 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -757,3 +757,12 @@ def test_shift_with_offsets_freq_empty(self): df_shifted = DataFrame(index=shifted_dates) result = df.shift(freq=offset) tm.assert_frame_equal(result, df_shifted) + + def test_series_shift_interval_preserves_closed(self): + # GH#60389 + ser = Series( + [pd.Interval(1, 2, closed="right"), pd.Interval(2, 3, closed="right")] + ) + result = ser.shift(1) + expected = Series([np.nan, pd.Interval(1, 2, closed="right")]) + tm.assert_series_equal(result, expected) From 37a893d3d40269fed1e2f1bf1d4687528f5b9445 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:11:18 +0530 Subject: [PATCH 05/10] DOC: fix SA01,ES01 for pandas.errors.PossibleDataLossError (#60403) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2a8b5f15d95f3..03c6b8dc077b9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -117,7 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.NumbaUtilError SA01" \ -i "pandas.errors.OutOfBoundsTimedelta SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ - -i "pandas.errors.PossibleDataLossError SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ -i "pandas.infer_freq SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d6d2fd82858ed..5642b0d33b4f7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -638,6 +638,15 @@ class PossibleDataLossError(Exception): """ Exception raised when trying to open a HDFStore file when already opened. + This error is triggered when there is a potential risk of data loss due to + conflicting operations on an HDFStore file. It serves to prevent unintended + overwrites or data corruption by enforcing exclusive access to the file. + + See Also + -------- + HDFStore : Dict-like IO interface for storing pandas objects in PyTables. + HDFStore.open : Open an HDFStore file in the specified mode. + Examples -------- >>> store = pd.HDFStore("my-store", "a") # doctest: +SKIP From 4648cd855a5456b334e5367d8aaf31161af93536 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:11:50 +0530 Subject: [PATCH 06/10] DOC: fix SA01 for pandas.errors.OutOfBoundsTimedelta (#60402) --- ci/code_checks.sh | 1 - pandas/_libs/tslibs/np_datetime.pyx | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 03c6b8dc077b9..2817d84bad7b8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -115,7 +115,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.NullFrequencyError SA01" \ -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ - -i "pandas.errors.OutOfBoundsTimedelta SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 193556b2697a9..1b7f04fe17238 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -201,6 +201,10 @@ class OutOfBoundsTimedelta(ValueError): Representation should be within a timedelta64[ns]. + See Also + -------- + date_range : Return a fixed frequency DatetimeIndex. + Examples -------- >>> pd.date_range(start="1/1/1700", freq="B", periods=100000) From 0fd80ba2daee8472b0fd82b5bbf33e541db01ab9 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:12:23 +0530 Subject: [PATCH 07/10] DOC: fix SA01,ES01 for pandas.errors.DuplicateLabelError (#60399) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2817d84bad7b8..8bafcb8944e14 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.std SA01" \ -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.var SA01" \ - -i "pandas.errors.DuplicateLabelError SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ -i "pandas.errors.InvalidIndexError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 5642b0d33b4f7..70e523688c644 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -393,6 +393,19 @@ class DuplicateLabelError(ValueError): """ Error raised when an operation would introduce duplicate labels. + This error is typically encountered when performing operations on objects + with `allows_duplicate_labels=False` and the operation would result in + duplicate labels in the index. Duplicate labels can lead to ambiguities + in indexing and reduce data integrity. + + See Also + -------- + Series.set_flags : Return a new ``Series`` object with updated flags. + DataFrame.set_flags : Return a new ``DataFrame`` object with updated flags. + Series.reindex : Conform ``Series`` object to new index with optional filling logic. + DataFrame.reindex : Conform ``DataFrame`` object to new index with optional filling + logic. + Examples -------- >>> s = pd.Series([0, 1, 2], index=["a", "b", "c"]).set_flags( From 8488b9ca83ca9624c69cc9035496b804980fb706 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:12:55 +0530 Subject: [PATCH 08/10] DOC: fix SA01,ES01 for pandas.errors.InvalidIndexError (#60400) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 8bafcb8944e14..58b0d26f7e2f3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -110,7 +110,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.var SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ - -i "pandas.errors.InvalidIndexError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 70e523688c644..814feadfb06e4 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -425,6 +425,16 @@ class InvalidIndexError(Exception): """ Exception raised when attempting to use an invalid index key. + This exception is triggered when a user attempts to access or manipulate + data in a pandas DataFrame or Series using an index key that is not valid + for the given object. This may occur in cases such as using a malformed + slice, a mismatched key for a ``MultiIndex``, or attempting to access an index + element that does not exist. + + See Also + -------- + MultiIndex : A multi-level, or hierarchical, index object for pandas objects. + Examples -------- >>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]]) From a328990466d63dca11349840635e36dedc721024 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:20:15 +0530 Subject: [PATCH 09/10] DOC: fix SA01 for pandas.errors.NumExprClobberingError (#60401) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 58b0d26f7e2f3..246a907c5052c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -111,7 +111,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.var SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ - -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 814feadfb06e4..70d839d817114 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -538,6 +538,11 @@ class NumExprClobberingError(NameError): to 'numexpr'. 'numexpr' is the default engine value for these methods if the numexpr package is installed. + See Also + -------- + eval : Evaluate a Python expression as a string using various backends. + DataFrame.query : Query the columns of a DataFrame with a boolean expression. + Examples -------- >>> df = pd.DataFrame({"abs": [1, 1, 1]}) From 12405f6f19900606f7e4354d17478539757f864f Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 25 Nov 2024 11:34:48 -0800 Subject: [PATCH 10/10] Update whatsnew and make it a separate test --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/io/formats/test_to_string.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d55fc3ed7b84..1b12735f0e7c1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -789,6 +789,7 @@ Other - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index bac4baa9766d0..af3cdf2d44af3 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -422,6 +422,7 @@ def test_to_string_complex_float_formatting(self): ) assert result == expected + def test_to_string_complex_float_formatting_with_exponents(self): # GH #60393 with option_context("display.precision", 6): df = DataFrame(