Skip to content

Commit 4e0af44

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ref/ri/methods
2 parents 6005d9c + 2536d3a commit 4e0af44

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+508
-182
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9999
-i "pandas.DataFrame.std PR01,RT03,SA01" \
100100
-i "pandas.DataFrame.sum RT03" \
101101
-i "pandas.DataFrame.swaplevel SA01" \
102-
-i "pandas.DataFrame.to_feather SA01" \
103102
-i "pandas.DataFrame.to_markdown SA01" \
104103
-i "pandas.DataFrame.to_parquet RT03" \
105104
-i "pandas.DataFrame.var PR01,RT03,SA01" \
106-
-i "pandas.DatetimeIndex.date SA01" \
107105
-i "pandas.DatetimeIndex.day_of_year SA01" \
108106
-i "pandas.DatetimeIndex.dayofyear SA01" \
109107
-i "pandas.DatetimeIndex.freqstr SA01" \
@@ -115,14 +113,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
115113
-i "pandas.DatetimeIndex.std PR01,RT03" \
116114
-i "pandas.DatetimeIndex.to_period RT03" \
117115
-i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
118-
-i "pandas.DatetimeIndex.tz SA01" \
119116
-i "pandas.DatetimeIndex.tz_convert RT03" \
120117
-i "pandas.DatetimeTZDtype SA01" \
121118
-i "pandas.DatetimeTZDtype.tz SA01" \
122-
-i "pandas.DatetimeTZDtype.unit SA01" \
123119
-i "pandas.Grouper PR02" \
124-
-i "pandas.HDFStore.put PR01,SA01" \
125-
-i "pandas.HDFStore.walk SA01" \
126120
-i "pandas.Index PR07" \
127121
-i "pandas.Index.T SA01" \
128122
-i "pandas.Index.append PR07,RT03,SA01" \
@@ -159,7 +153,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
159153
-i "pandas.Index.take PR01,PR07" \
160154
-i "pandas.Index.to_list RT03" \
161155
-i "pandas.Index.union PR07,RT03,SA01" \
162-
-i "pandas.Index.unique RT03" \
163156
-i "pandas.Index.view GL08" \
164157
-i "pandas.Int16Dtype SA01" \
165158
-i "pandas.Int32Dtype SA01" \
@@ -270,7 +263,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
270263
-i "pandas.Series.dt.as_unit PR01,PR02" \
271264
-i "pandas.Series.dt.ceil PR01,PR02" \
272265
-i "pandas.Series.dt.components SA01" \
273-
-i "pandas.Series.dt.date SA01" \
274266
-i "pandas.Series.dt.day_name PR01,PR02" \
275267
-i "pandas.Series.dt.day_of_year SA01" \
276268
-i "pandas.Series.dt.dayofyear SA01" \
@@ -290,7 +282,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
290282
-i "pandas.Series.dt.strftime PR01,PR02" \
291283
-i "pandas.Series.dt.to_period PR01,PR02,RT03" \
292284
-i "pandas.Series.dt.total_seconds PR01" \
293-
-i "pandas.Series.dt.tz SA01" \
294285
-i "pandas.Series.dt.tz_convert PR01,PR02,RT03" \
295286
-i "pandas.Series.dt.tz_localize PR01,PR02" \
296287
-i "pandas.Series.dt.unit GL08" \

doc/source/development/community.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ The pandas mailing list `[email protected] <mailto://pandas-dev@python
100100
conversations and to engage people in the wider community who might not
101101
be active on the issue tracker but we would like to include in discussions.
102102

103+
Join the mailing list and view the archives `here <https://mail.python.org/mailman/listinfo/pandas-dev>`_.
104+
103105
.. _community.slack:
104106

105107
Community slack

doc/source/development/contributing_codebase.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,12 @@ is being raised, using ``pytest.raises`` instead.
557557
Testing a warning
558558
^^^^^^^^^^^^^^^^^
559559

560-
Use ``tm.assert_produces_warning`` as a context manager to check that a block of code raises a warning.
560+
Use ``tm.assert_produces_warning`` as a context manager to check that a block of code raises a warning
561+
and specify the warning message using the ``match`` argument.
561562

562563
.. code-block:: python
563564
564-
with tm.assert_produces_warning(DeprecationWarning):
565+
with tm.assert_produces_warning(DeprecationWarning, match="the warning message"):
565566
pd.deprecated_function()
566567
567568
If a warning should specifically not happen in a block of code, pass ``False`` into the context manager.

doc/source/user_guide/style.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,7 +1908,7 @@
19081908
"- Provide an API that is pleasing to use interactively and is \"good enough\" for many tasks\n",
19091909
"- Provide the foundations for dedicated libraries to build on\n",
19101910
"\n",
1911-
"If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n",
1911+
"If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/community/ecosystem.html) to it.\n",
19121912
"\n",
19131913
"### Subclassing\n",
19141914
"\n",

doc/source/whatsnew/v3.0.0.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Other enhancements
4141
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
4242
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4343
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
44+
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
4445

4546
.. ---------------------------------------------------------------------------
4647
.. _whatsnew_300.notable_bug_fixes:
@@ -158,6 +159,7 @@ Other API changes
158159
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
159160
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
160161
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
162+
- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`)
161163

162164
.. ---------------------------------------------------------------------------
163165
.. _whatsnew_300.deprecations:
@@ -199,6 +201,7 @@ Other Deprecations
199201
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
200202
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
201203
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
204+
- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`)
202205
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
203206
-
204207

@@ -220,6 +223,7 @@ Removal of prior version deprecations/changes
220223
- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
221224
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
222225
- Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`)
226+
- Disallow indexing an :class:`Index` with a boolean indexer of length zero, it now raises ``ValueError`` (:issue:`55820`)
223227
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
224228
- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
225229
- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`)
@@ -329,6 +333,7 @@ Performance improvements
329333
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
330334
- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
331335
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
336+
- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
332337
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
333338
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
334339
- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`)
@@ -363,6 +368,7 @@ Datetimelike
363368
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
364369
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
365370
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
371+
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
366372

367373
Timedelta
368374
^^^^^^^^^
@@ -413,6 +419,7 @@ MultiIndex
413419
I/O
414420
^^^
415421
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
422+
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
416423
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
417424
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
418425
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)

pandas/_testing/asserters.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,12 +861,19 @@ def assert_series_equal(
861861
check_names : bool, default True
862862
Whether to check the Series and Index names attribute.
863863
check_exact : bool, default False
864-
Whether to compare number exactly.
864+
Whether to compare number exactly. This also applies when checking
865+
Index equivalence.
865866
866867
.. versionchanged:: 2.2.0
867868
868869
Defaults to True for integer dtypes if none of
869870
``check_exact``, ``rtol`` and ``atol`` are specified.
871+
872+
.. versionchanged:: 3.0.0
873+
874+
check_exact for comparing the Indexes defaults to True by
875+
checking if an Index is of integer dtypes.
876+
870877
check_datetimelike_compat : bool, default False
871878
Compare datetime-like which is comparable ignoring dtype.
872879
check_categorical : bool, default True
@@ -902,7 +909,6 @@ def assert_series_equal(
902909
>>> tm.assert_series_equal(a, b)
903910
"""
904911
__tracebackhide__ = True
905-
check_exact_index = False if check_exact is lib.no_default else check_exact
906912
if (
907913
check_exact is lib.no_default
908914
and rtol is lib.no_default
@@ -914,8 +920,20 @@ def assert_series_equal(
914920
or is_numeric_dtype(right.dtype)
915921
and not is_float_dtype(right.dtype)
916922
)
923+
left_index_dtypes = (
924+
[left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes
925+
)
926+
right_index_dtypes = (
927+
[right.index.dtype] if right.index.nlevels == 1 else right.index.dtypes
928+
)
929+
check_exact_index = all(
930+
dtype.kind in "iu" for dtype in left_index_dtypes
931+
) or all(dtype.kind in "iu" for dtype in right_index_dtypes)
917932
elif check_exact is lib.no_default:
918933
check_exact = False
934+
check_exact_index = False
935+
else:
936+
check_exact_index = check_exact
919937

920938
rtol = rtol if rtol is not lib.no_default else 1.0e-5
921939
atol = atol if atol is not lib.no_default else 1.0e-8

pandas/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def pytest_collection_modifyitems(items, config) -> None:
157157
("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
158158
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
159159
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
160+
("to_pytimedelta", "The behavior of TimedeltaProperties.to_pytimedelta"),
160161
# Docstring divides by zero to show behavior difference
161162
("missing.mask_zero_div_zero", "divide by zero encountered"),
162163
(

pandas/core/arrays/categorical.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from shutil import get_terminal_size
77
from typing import (
88
TYPE_CHECKING,
9+
Callable,
910
Literal,
1011
cast,
1112
overload,
@@ -2508,6 +2509,28 @@ def equals(self, other: object) -> bool:
25082509
return np.array_equal(self._codes, other._codes)
25092510
return False
25102511

2512+
def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self:
2513+
func: Callable
2514+
if name == "cummin":
2515+
func = np.minimum.accumulate
2516+
elif name == "cummax":
2517+
func = np.maximum.accumulate
2518+
else:
2519+
raise TypeError(f"Accumulation {name} not supported for {type(self)}")
2520+
self.check_for_ordered(name)
2521+
2522+
codes = self.codes.copy()
2523+
mask = self.isna()
2524+
if func == np.minimum.accumulate:
2525+
codes[mask] = np.iinfo(codes.dtype.type).max
2526+
# no need to change codes for maximum because codes[mask] is already -1
2527+
if not skipna:
2528+
mask = np.maximum.accumulate(mask)
2529+
2530+
codes = func(codes)
2531+
codes[mask] = -1
2532+
return self._simple_new(codes, dtype=self._dtype)
2533+
25112534
@classmethod
25122535
def _concat_same_type(cls, to_concat: Sequence[Self], axis: AxisInt = 0) -> Self:
25132536
from pandas.core.dtypes.concat import union_categoricals

pandas/core/arrays/datetimes.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ def _unbox_scalar(self, value) -> np.datetime64:
539539
if value is NaT:
540540
return np.datetime64(value._value, self.unit)
541541
else:
542-
return value.as_unit(self.unit).asm8
542+
return value.as_unit(self.unit, round_ok=False).asm8
543543

544544
def _scalar_from_string(self, value) -> Timestamp | NaTType:
545545
return Timestamp(value, tz=self.tz)
@@ -593,6 +593,13 @@ def tz(self) -> tzinfo | None:
593593
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
594594
Returns None when the array is tz-naive.
595595
596+
See Also
597+
--------
598+
DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
599+
given time zone, or remove timezone from a tz-aware DatetimeIndex.
600+
DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
601+
one time zone to another.
602+
596603
Examples
597604
--------
598605
For Series:
@@ -1476,6 +1483,14 @@ def date(self) -> npt.NDArray[np.object_]:
14761483
Namely, the date part of Timestamps without time and
14771484
timezone information.
14781485
1486+
See Also
1487+
--------
1488+
DatetimeIndex.time : Returns numpy array of :class:`datetime.time` objects.
1489+
The time part of the Timestamps.
1490+
DatetimeIndex.year : The year of the datetime.
1491+
DatetimeIndex.month : The month as January=1, December=12.
1492+
DatetimeIndex.day : The day of the datetime.
1493+
14791494
Examples
14801495
--------
14811496
For Series:

pandas/core/arrays/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def _unbox_scalar(self, value) -> np.timedelta64:
322322
if value is NaT:
323323
return np.timedelta64(value._value, self.unit)
324324
else:
325-
return value.as_unit(self.unit).asm8
325+
return value.as_unit(self.unit, round_ok=False).asm8
326326

327327
def _scalar_from_string(self, value) -> Timedelta | NaTType:
328328
return Timedelta(value)

pandas/core/dtypes/dtypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,10 @@ def unit(self) -> str_type:
793793
"""
794794
The precision of the datetime data.
795795
796+
See Also
797+
--------
798+
DatetimeTZDtype.tz : Retrieves the timezone.
799+
796800
Examples
797801
--------
798802
>>> from zoneinfo import ZoneInfo

pandas/core/frame.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2685,6 +2685,16 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:
26852685
This includes the `compression`, `compression_level`, `chunksize`
26862686
and `version` keywords.
26872687
2688+
See Also
2689+
--------
2690+
DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
2691+
DataFrame.to_excel : Write object to an Excel sheet.
2692+
DataFrame.to_sql : Write to a sql table.
2693+
DataFrame.to_csv : Write a csv file.
2694+
DataFrame.to_json : Convert the object to a JSON string.
2695+
DataFrame.to_html : Render a DataFrame as an HTML table.
2696+
DataFrame.to_string : Convert DataFrame to a string.
2697+
26882698
Notes
26892699
-----
26902700
This function writes the dataframe as a `feather file
@@ -4012,7 +4022,6 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
40124022
return series._values[index]
40134023

40144024
series = self._get_item(col)
4015-
engine = self.index._engine
40164025

40174026
if not isinstance(self.index, MultiIndex):
40184027
# CategoricalIndex: Trying to use the engine fastpath may give incorrect
@@ -4023,7 +4032,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
40234032

40244033
# For MultiIndex going through engine effectively restricts us to
40254034
# same-length tuples; see test_get_set_value_no_partial_indexing
4026-
loc = engine.get_loc(index)
4035+
loc = self.index._engine.get_loc(index)
40274036
return series._values[loc]
40284037

40294038
def isetitem(self, loc, value) -> None:

pandas/core/indexes/accessors.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
NoReturn,
1010
cast,
1111
)
12+
import warnings
1213

1314
import numpy as np
1415

1516
from pandas._libs import lib
17+
from pandas.util._exceptions import find_stack_level
1618

1719
from pandas.core.dtypes.common import (
1820
is_integer_dtype,
@@ -210,6 +212,15 @@ def _delegate_method(self, name: str, *args, **kwargs):
210212
return result
211213

212214
def to_pytimedelta(self):
215+
# GH 57463
216+
warnings.warn(
217+
f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, "
218+
"in a future version this will return a Series containing python "
219+
"datetime.timedelta objects instead of an ndarray. To retain the "
220+
"old behavior, call `np.array` on the result",
221+
FutureWarning,
222+
stacklevel=find_stack_level(),
223+
)
213224
return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta()
214225

215226
def to_pydatetime(self) -> Series:
@@ -462,6 +473,15 @@ def to_pytimedelta(self) -> np.ndarray:
462473
datetime.timedelta(days=2), datetime.timedelta(days=3),
463474
datetime.timedelta(days=4)], dtype=object)
464475
"""
476+
# GH 57463
477+
warnings.warn(
478+
f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, "
479+
"in a future version this will return a Series containing python "
480+
"datetime.timedelta objects instead of an ndarray. To retain the "
481+
"old behavior, call `np.array` on the result",
482+
FutureWarning,
483+
stacklevel=find_stack_level(),
484+
)
465485
return self._get_values().to_pytimedelta()
466486

467487
@property

0 commit comments

Comments
 (0)