Skip to content

Commit acfe152

Browse files
authored
Merge branch 'main' into BUG-57390/Identity-checking-NA-in-map-incorrect
2 parents 53a885c + 47b56ea commit acfe152

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1233
-592
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ jobs:
380380
fetch-depth: 0
381381

382382
- name: Set up Python Free-threading Version
383-
uses: deadsnakes/action@v3.1.0
383+
uses: deadsnakes/action@v3.2.0
384384
with:
385385
python-version: 3.13-dev
386386
nogil: true

ci/code_checks.sh

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.NA SA01" \
7474
-i "pandas.Period.freq GL08" \
7575
-i "pandas.Period.ordinal GL08" \
76-
-i "pandas.Period.strftime PR01,SA01" \
77-
-i "pandas.Period.to_timestamp SA01" \
78-
-i "pandas.PeriodDtype SA01" \
7976
-i "pandas.PeriodDtype.freq SA01" \
80-
-i "pandas.RangeIndex PR07" \
8177
-i "pandas.RangeIndex.from_range PR01,SA01" \
82-
-i "pandas.RangeIndex.start SA01" \
8378
-i "pandas.RangeIndex.step SA01" \
84-
-i "pandas.RangeIndex.stop SA01" \
8579
-i "pandas.Series.cat.add_categories PR01,PR02" \
8680
-i "pandas.Series.cat.as_ordered PR01" \
8781
-i "pandas.Series.cat.as_unordered PR01" \
@@ -95,12 +89,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9589
-i "pandas.Series.dt.day_name PR01,PR02" \
9690
-i "pandas.Series.dt.floor PR01,PR02" \
9791
-i "pandas.Series.dt.freq GL08" \
98-
-i "pandas.Series.dt.microseconds SA01" \
9992
-i "pandas.Series.dt.month_name PR01,PR02" \
100-
-i "pandas.Series.dt.nanoseconds SA01" \
10193
-i "pandas.Series.dt.normalize PR01" \
10294
-i "pandas.Series.dt.round PR01,PR02" \
103-
-i "pandas.Series.dt.seconds SA01" \
10495
-i "pandas.Series.dt.strftime PR01,PR02" \
10596
-i "pandas.Series.dt.to_period PR01,PR02" \
10697
-i "pandas.Series.dt.total_seconds PR01" \
@@ -112,39 +103,20 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
112103
-i "pandas.Series.sparse.from_coo PR07,SA01" \
113104
-i "pandas.Series.sparse.npoints SA01" \
114105
-i "pandas.Series.sparse.sp_values SA01" \
115-
-i "pandas.Timedelta.asm8 SA01" \
116-
-i "pandas.Timedelta.ceil SA01" \
117106
-i "pandas.Timedelta.components SA01" \
118-
-i "pandas.Timedelta.floor SA01" \
119107
-i "pandas.Timedelta.max PR02" \
120108
-i "pandas.Timedelta.min PR02" \
121109
-i "pandas.Timedelta.resolution PR02" \
122-
-i "pandas.Timedelta.round SA01" \
123-
-i "pandas.Timedelta.to_numpy PR01" \
124110
-i "pandas.Timedelta.to_timedelta64 SA01" \
125111
-i "pandas.Timedelta.total_seconds SA01" \
126-
-i "pandas.Timedelta.view SA01" \
127-
-i "pandas.TimedeltaIndex.components SA01" \
128-
-i "pandas.TimedeltaIndex.microseconds SA01" \
129-
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
130-
-i "pandas.TimedeltaIndex.seconds SA01" \
131112
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
132113
-i "pandas.Timestamp.max PR02" \
133114
-i "pandas.Timestamp.min PR02" \
134115
-i "pandas.Timestamp.nanosecond GL08" \
135116
-i "pandas.Timestamp.resolution PR02" \
136117
-i "pandas.Timestamp.tzinfo GL08" \
137-
-i "pandas.Timestamp.value GL08" \
138118
-i "pandas.Timestamp.year GL08" \
139119
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
140-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
141-
-i "pandas.api.types.is_bool PR01,SA01" \
142-
-i "pandas.api.types.is_categorical_dtype SA01" \
143-
-i "pandas.api.types.is_complex PR01,SA01" \
144-
-i "pandas.api.types.is_complex_dtype SA01" \
145-
-i "pandas.api.types.is_datetime64_dtype SA01" \
146-
-i "pandas.api.types.is_datetime64_ns_dtype SA01" \
147-
-i "pandas.api.types.is_datetime64tz_dtype SA01" \
148120
-i "pandas.api.types.is_dict_like PR07,SA01" \
149121
-i "pandas.api.types.is_extension_array_dtype SA01" \
150122
-i "pandas.api.types.is_file_like PR07,SA01" \
@@ -178,7 +150,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
178150
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
179151
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
180152
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
181-
-i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
182153
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
183154
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
184155
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
@@ -194,7 +165,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
194165
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
195166
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
196167
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
197-
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
198168
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
199169
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
200170
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
@@ -224,7 +194,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
224194
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
225195
-i "pandas.core.resample.Resampler.var SA01" \
226196
-i "pandas.date_range RT03" \
227-
-i "pandas.errors.AbstractMethodError PR01,SA01" \
228197
-i "pandas.errors.AttributeConflictWarning SA01" \
229198
-i "pandas.errors.CSSWarning SA01" \
230199
-i "pandas.errors.CategoricalConversionWarning SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ notable_bug_fix1
5353

5454
Deprecations
5555
~~~~~~~~~~~~
56-
-
56+
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
5757
-
5858

5959
.. ---------------------------------------------------------------------------
@@ -103,7 +103,8 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106-
-
106+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107+
107108

108109
Interval
109110
^^^^^^^^

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
56+
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5657
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5758
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5859
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -503,6 +504,7 @@ Performance improvements
503504
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504505
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505506
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
507+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506508
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507509
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508510
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
@@ -526,6 +528,7 @@ Performance improvements
526528
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
527529
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
528530
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
531+
- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
529532
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
530533
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
531534
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
@@ -667,6 +670,7 @@ Reshaping
667670
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
668671
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
669672
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
673+
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
670674
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
671675

672676
Sparse

pandas/_libs/lib.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,10 +1123,21 @@ def is_bool(obj: object) -> bool:
11231123
"""
11241124
Return True if given object is boolean.
11251125

1126+
Parameters
1127+
----------
1128+
obj : object
1129+
Object to check.
1130+
11261131
Returns
11271132
-------
11281133
bool
11291134

1135+
See Also
1136+
--------
1137+
api.types.is_scalar : Check if the input is a scalar.
1138+
api.types.is_integer : Check if the input is an integer.
1139+
api.types.is_float : Check if the input is a float.
1140+
11301141
Examples
11311142
--------
11321143
>>> pd.api.types.is_bool(True)
@@ -1142,10 +1153,22 @@ def is_complex(obj: object) -> bool:
11421153
"""
11431154
Return True if given object is complex.
11441155

1156+
Parameters
1157+
----------
1158+
obj : object
1159+
Object to check.
1160+
11451161
Returns
11461162
-------
11471163
bool
11481164

1165+
See Also
1166+
--------
1167+
api.types.is_complex_dtype: Check whether the provided array or
1168+
dtype is of a complex dtype.
1169+
api.types.is_number: Check if the object is a number.
1170+
api.types.is_integer: Return True if given object is integer.
1171+
11491172
Examples
11501173
--------
11511174
>>> pd.api.types.is_complex(1 + 1j)

pandas/_libs/tslibs/period.pyx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2001,6 +2001,12 @@ cdef class _Period(PeriodMixin):
20012001
-------
20022002
Timestamp
20032003

2004+
See Also
2005+
--------
2006+
Timestamp : A class representing a single point in time.
2007+
Period : Represents a span of time with a fixed frequency.
2008+
PeriodIndex.to_timestamp : Convert a `PeriodIndex` to a `DatetimeIndex`.
2009+
20042010
Examples
20052011
--------
20062012
>>> period = pd.Period('2023-1-1', freq='D')
@@ -2755,6 +2761,27 @@ cdef class _Period(PeriodMixin):
27552761
| ``%%`` | A literal ``'%'`` character. | |
27562762
+-----------+--------------------------------+-------+
27572763

2764+
The `strftime` method provides a way to represent a :class:`Period`
2765+
object as a string in a specified format. This is particularly useful
2766+
when displaying date and time data in different locales or customized
2767+
formats, suitable for reports or user interfaces. It extends the standard
2768+
Python string formatting capabilities with additional directives specific
2769+
to `pandas`, accommodating features like fiscal years and precise
2770+
sub-second components.
2771+
2772+
Parameters
2773+
----------
2774+
fmt : str or None
2775+
String containing the desired format directives. If ``None``, the
2776+
format is determined based on the Period's frequency.
2777+
2778+
See Also
2779+
--------
2780+
Timestamp.strftime : Return a formatted string of the Timestamp.
2781+
to_datetime : Convert argument to datetime.
2782+
time.strftime : Format a time object as a string according to a
2783+
specified format string in the standard Python library.
2784+
27582785
Notes
27592786
-----
27602787

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,9 +1421,16 @@ cdef class _Timedelta(timedelta):
14211421
"""
14221422
Convert the Timedelta to a NumPy timedelta64.
14231423

1424-
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
1425-
copy parameters are available here only for compatibility. Their values
1426-
will not affect the return value.
1424+
This is an alias method for `Timedelta.to_timedelta64()`.
1425+
1426+
Parameters
1427+
----------
1428+
dtype : NoneType
1429+
It is available here only for compatibility. Its value will not
1430+
affect the return value.
1431+
copy : bool, default False
1432+
It is available here only for compatibility. Its value will not
1433+
affect the return value.
14271434

14281435
Returns
14291436
-------
@@ -1451,11 +1458,26 @@ cdef class _Timedelta(timedelta):
14511458
"""
14521459
Array view compatibility.
14531460
1461+
This method allows you to reinterpret the underlying data of a Timedelta
1462+
object as a different dtype. The `view` method provides a way to reinterpret
1463+
the internal representation of the `Timedelta` object without modifying its
1464+
data. This is particularly useful when you need to work with the underlying
1465+
data directly, such as for performance optimizations or interfacing with
1466+
low-level APIs. The returned value is typically the number of nanoseconds
1467+
since the epoch, represented as an integer or another specified dtype.
1468+
14541469
Parameters
14551470
----------
14561471
dtype : str or dtype
14571472
The dtype to view the underlying data as.
14581473
1474+
See Also
1475+
--------
1476+
numpy.ndarray.view : Returns a view of an array with the same data.
1477+
Timedelta.to_numpy : Converts the Timedelta to a NumPy timedelta64.
1478+
Timedelta.total_seconds : Returns the total duration of the Timedelta
1479+
object in seconds.
1480+
14591481
Examples
14601482
--------
14611483
>>> td = pd.Timedelta('3D')
@@ -1498,6 +1520,12 @@ cdef class _Timedelta(timedelta):
14981520
numpy timedelta64 array scalar view
14991521
Array scalar view of the timedelta in nanoseconds.
15001522

1523+
See Also
1524+
--------
1525+
Timedelta.total_seconds : Return the total seconds in the duration.
1526+
Timedelta.components : Return a namedtuple of the Timedelta's components.
1527+
Timedelta.to_timedelta64 : Convert the Timedelta to a numpy.timedelta64.
1528+
15011529
Examples
15021530
--------
15031531
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
@@ -2061,6 +2089,12 @@ class Timedelta(_Timedelta):
20612089
------
20622090
ValueError if the freq cannot be converted
20632091
2092+
See Also
2093+
--------
2094+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2095+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2096+
Timestamp.ceil : Similar method for Timestamp objects.
2097+
20642098
Examples
20652099
--------
20662100
>>> td = pd.Timedelta('1001ms')
@@ -2081,6 +2115,16 @@ class Timedelta(_Timedelta):
20812115
Frequency string indicating the flooring resolution.
20822116
It uses the same units as class constructor :class:`~pandas.Timedelta`.
20832117
2118+
Returns
2119+
-------
2120+
Timedelta
2121+
A new Timedelta object floored to the specified resolution.
2122+
2123+
See Also
2124+
--------
2125+
Timestamp.ceil : Round the Timestamp up to the nearest specified resolution.
2126+
Timestamp.round : Round the Timestamp to the nearest specified resolution.
2127+
20842128
Examples
20852129
--------
20862130
>>> td = pd.Timedelta('1001ms')
@@ -2101,6 +2145,16 @@ class Timedelta(_Timedelta):
21012145
Frequency string indicating the ceiling resolution.
21022146
It uses the same units as class constructor :class:`~pandas.Timedelta`.
21032147
2148+
Returns
2149+
-------
2150+
Timedelta
2151+
A new Timedelta object ceiled to the specified resolution.
2152+
2153+
See Also
2154+
--------
2155+
Timedelta.floor : Floor the Timedelta to the specified resolution.
2156+
Timedelta.round : Round the Timedelta to the nearest specified resolution.
2157+
21042158
Examples
21052159
--------
21062160
>>> td = pd.Timedelta('1001ms')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@ cdef class _Timestamp(ABCTimestamp):
240240

241241
@property
242242
def value(self) -> int:
243+
"""
244+
Return the value of the Timestamp.
245+
246+
Returns
247+
-------
248+
int
249+
The integer representation of the Timestamp object in nanoseconds
250+
since the Unix epoch (1970-01-01 00:00:00 UTC).
251+
252+
See Also
253+
--------
254+
Timestamp.second : Return the second of the Timestamp.
255+
Timestamp.minute : Return the minute of the Timestamp.
256+
257+
Examples
258+
--------
259+
>>> ts = pd.Timestamp("2024-08-31 16:16:30")
260+
>>> ts.value
261+
1725120990000000000
262+
"""
263+
243264
try:
244265
return convert_reso(self._value, self._creso, NPY_FR_ns, False)
245266
except OverflowError:
@@ -1020,8 +1041,8 @@ cdef class _Timestamp(ABCTimestamp):
10201041

10211042
See Also
10221043
--------
1023-
Timestamp.day : Return the day of the year.
1024-
Timestamp.year : Return the year of the week.
1044+
Timestamp.day : Return the day of the Timestamp.
1045+
Timestamp.year : Return the year of the Timestamp.
10251046

10261047
Examples
10271048
--------

0 commit comments

Comments
 (0)