Skip to content

Commit b56ad16

Browse files
committed
Merge branch 'main' into depr-make_block
2 parents 87ac3f0 + 76c7274 commit b56ad16

File tree

144 files changed

+1348
-783
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+1348
-783
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ jobs:
140140
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
141141

142142
- name: Build wheels
143-
uses: pypa/[email protected].0
143+
uses: pypa/[email protected].1
144144
with:
145145
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
146146
env:

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.4.3
22+
rev: v0.4.7
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -40,7 +40,7 @@ repos:
4040
pass_filenames: true
4141
require_serial: false
4242
- repo: https://github.com/codespell-project/codespell
43-
rev: v2.2.6
43+
rev: v2.3.0
4444
hooks:
4545
- id: codespell
4646
types_or: [python, rst, markdown, cython, c]
@@ -92,7 +92,7 @@ repos:
9292
- id: sphinx-lint
9393
args: ["--enable", "all", "--disable", "line-too-long"]
9494
- repo: https://github.com/pre-commit/mirrors-clang-format
95-
rev: v18.1.4
95+
rev: v18.1.5
9696
hooks:
9797
- id: clang-format
9898
files: ^pandas/_libs/src|^pandas/_libs/include

asv_bench/benchmarks/tslibs/fields.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ class TimeGetTimedeltaField:
1919
def setup(self, size, field):
2020
arr = np.random.randint(0, 10, size=size, dtype="i8")
2121
self.i8data = arr
22+
arr = np.random.randint(-86400 * 1_000_000_000, 0, size=size, dtype="i8")
23+
self.i8data_negative = arr
2224

2325
def time_get_timedelta_field(self, size, field):
2426
get_timedelta_field(self.i8data, field)
2527

28+
def time_get_timedelta_field_negative_td(self, size, field):
29+
get_timedelta_field(self.i8data_negative, field)
30+
2631

2732
class TimeGetDateField:
2833
params = [
@@ -72,3 +77,6 @@ def setup(self, size, side, period, freqstr, month_kw):
7277

7378
def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
7479
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
80+
81+
82+
from ..pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -74,39 +74,31 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7474
-i "pandas.DataFrame.mean RT03,SA01" \
7575
-i "pandas.DataFrame.median RT03,SA01" \
7676
-i "pandas.DataFrame.min RT03" \
77-
-i "pandas.DataFrame.plot PR02,SA01" \
77+
-i "pandas.DataFrame.plot PR02" \
7878
-i "pandas.Grouper PR02" \
79-
-i "pandas.MultiIndex PR01" \
8079
-i "pandas.MultiIndex.append PR07,SA01" \
8180
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
8281
-i "pandas.MultiIndex.drop PR07,RT03,SA01" \
83-
-i "pandas.MultiIndex.dtypes SA01" \
8482
-i "pandas.MultiIndex.get_level_values SA01" \
8583
-i "pandas.MultiIndex.get_loc PR07" \
8684
-i "pandas.MultiIndex.get_loc_level PR07" \
87-
-i "pandas.MultiIndex.levels SA01" \
8885
-i "pandas.MultiIndex.levshape SA01" \
8986
-i "pandas.MultiIndex.names SA01" \
9087
-i "pandas.MultiIndex.nlevels SA01" \
9188
-i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
9289
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
93-
-i "pandas.MultiIndex.set_codes SA01" \
9490
-i "pandas.MultiIndex.set_levels RT03,SA01" \
9591
-i "pandas.MultiIndex.sortlevel PR07,SA01" \
9692
-i "pandas.MultiIndex.to_frame RT03" \
97-
-i "pandas.MultiIndex.truncate SA01" \
9893
-i "pandas.NA SA01" \
9994
-i "pandas.NaT SA01" \
10095
-i "pandas.NamedAgg SA01" \
101-
-i "pandas.Period SA01" \
10296
-i "pandas.Period.asfreq SA01" \
10397
-i "pandas.Period.freq GL08" \
10498
-i "pandas.Period.freqstr SA01" \
105-
-i "pandas.Period.is_leap_year SA01" \
10699
-i "pandas.Period.month SA01" \
107100
-i "pandas.Period.now SA01" \
108101
-i "pandas.Period.ordinal GL08" \
109-
-i "pandas.Period.quarter SA01" \
110102
-i "pandas.Period.strftime PR01,SA01" \
111103
-i "pandas.Period.to_timestamp SA01" \
112104
-i "pandas.Period.year SA01" \
@@ -172,7 +164,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
172164
-i "pandas.Series.lt SA01" \
173165
-i "pandas.Series.ne SA01" \
174166
-i "pandas.Series.pad PR01,SA01" \
175-
-i "pandas.Series.plot PR02,SA01" \
167+
-i "pandas.Series.plot PR02" \
176168
-i "pandas.Series.pop RT03,SA01" \
177169
-i "pandas.Series.prod RT03" \
178170
-i "pandas.Series.product RT03" \
@@ -367,7 +359,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
367359
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
368360
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
369361
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
370-
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02,SA01" \
362+
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
371363
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
372364
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
373365
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
@@ -385,7 +377,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
385377
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
386378
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
387379
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
388-
-i "pandas.core.groupby.SeriesGroupBy.plot PR02,SA01" \
380+
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
389381
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
390382
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
391383
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
@@ -478,31 +470,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
478470
-i "pandas.merge PR07" \
479471
-i "pandas.merge_asof PR07,RT03" \
480472
-i "pandas.merge_ordered PR07" \
481-
-i "pandas.option_context SA01" \
482473
-i "pandas.period_range RT03,SA01" \
483474
-i "pandas.pivot PR07" \
484475
-i "pandas.pivot_table PR07" \
485476
-i "pandas.plotting.andrews_curves RT03,SA01" \
486-
-i "pandas.plotting.autocorrelation_plot RT03,SA01" \
487477
-i "pandas.plotting.lag_plot RT03,SA01" \
488-
-i "pandas.plotting.parallel_coordinates PR07,RT03,SA01" \
489-
-i "pandas.plotting.plot_params SA01" \
490478
-i "pandas.plotting.scatter_matrix PR07,SA01" \
491-
-i "pandas.plotting.table PR07,RT03,SA01" \
492479
-i "pandas.qcut PR07,SA01" \
493-
-i "pandas.read_feather SA01" \
494-
-i "pandas.read_orc SA01" \
495-
-i "pandas.read_sas SA01" \
496480
-i "pandas.read_spss SA01" \
497-
-i "pandas.reset_option SA01" \
498481
-i "pandas.set_eng_float_format RT03,SA01" \
499-
-i "pandas.set_option SA01" \
500-
-i "pandas.show_versions SA01" \
501-
-i "pandas.test SA01" \
502482
-i "pandas.testing.assert_extension_array_equal SA01" \
503-
-i "pandas.testing.assert_index_equal PR07,SA01" \
504-
-i "pandas.testing.assert_series_equal PR07,SA01" \
505-
-i "pandas.timedelta_range SA01" \
506483
-i "pandas.tseries.offsets.BDay PR02,SA01" \
507484
-i "pandas.tseries.offsets.BQuarterBegin PR02" \
508485
-i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
@@ -787,16 +764,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
787764
-i "pandas.tseries.offsets.YearBegin.nanos GL08" \
788765
-i "pandas.tseries.offsets.YearBegin.normalize GL08" \
789766
-i "pandas.tseries.offsets.YearBegin.rule_code GL08" \
790-
-i "pandas.tseries.offsets.YearEnd PR02" \
791767
-i "pandas.tseries.offsets.YearEnd.freqstr SA01" \
792768
-i "pandas.tseries.offsets.YearEnd.is_on_offset GL08" \
793769
-i "pandas.tseries.offsets.YearEnd.month GL08" \
794770
-i "pandas.tseries.offsets.YearEnd.n GL08" \
795771
-i "pandas.tseries.offsets.YearEnd.nanos GL08" \
796772
-i "pandas.tseries.offsets.YearEnd.normalize GL08" \
797773
-i "pandas.tseries.offsets.YearEnd.rule_code GL08" \
798-
-i "pandas.unique PR07" \
799-
-i "pandas.util.hash_array PR07,SA01" \
800774
-i "pandas.util.hash_pandas_object PR07,SA01" # There should be no backslash in the final line, please keep this comment in the last ignored function
801775

802776
RET=$(($RET + $?)) ; echo $MSG "DONE"

doc/source/user_guide/merging.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ with optional filling of missing data with ``fill_method``.
974974
:func:`merge_asof`
975975
---------------------
976976

977-
:func:`merge_asof` is similar to an ordered left-join except that mactches are on the
977+
:func:`merge_asof` is similar to an ordered left-join except that matches are on the
978978
nearest key rather than equal keys. For each row in the ``left`` :class:`DataFrame`,
979979
the last row in the ``right`` :class:`DataFrame` are selected where the ``on`` key is less
980980
than the left's key. Both :class:`DataFrame` must be sorted by the key.

doc/source/user_guide/missing_data.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use :class:`api.typing.NaTType`.
3232
:class:`NA` for :class:`StringDtype`, :class:`Int64Dtype` (and other bit widths),
3333
:class:`Float64Dtype` (and other bit widths), :class:`BooleanDtype` and :class:`ArrowDtype`.
3434
These types will maintain the original data type of the data.
35-
For typing applications, use :class:`api.types.NAType`.
35+
For typing applications, use :class:`api.typing.NAType`.
3636

3737
.. ipython:: python
3838
@@ -337,10 +337,8 @@ When taking the product, NA values or empty data will be treated as 1.
337337
pd.Series([], dtype="float64").prod()
338338
339339
Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
340-
ignore NA values by default preserve them in the result. This behavior can be changed
341-
with ``skipna``
342-
343-
* Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` ignore NA values by default, but preserve them in the resulting arrays. To override this behaviour and include NA values, use ``skipna=False``.
340+
ignore NA values by default, but preserve them in the resulting array. To override
341+
this behaviour and include NA values in the calculation, use ``skipna=False``.
344342

345343

346344
.. ipython:: python

doc/source/user_guide/timeseries.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,10 @@ frequencies. We will refer to these aliases as *offset aliases*.
12731273
are deprecated in favour of the aliases ``h``, ``bh``, ``cbh``,
12741274
``min``, ``s``, ``ms``, ``us``, and ``ns``.
12751275

1276+
Aliases ``Y``, ``M``, and ``Q`` are deprecated in favour of the aliases
1277+
``YE``, ``ME``, ``QE``.
1278+
1279+
12761280
.. note::
12771281

12781282
When using the offset aliases above, it should be noted that functions

doc/source/whatsnew/v3.0.0.rst

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ Other enhancements
4444
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4545
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
4646
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
47+
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
4748
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
48-
-
4949

5050
.. ---------------------------------------------------------------------------
5151
.. _whatsnew_300.notable_bug_fixes:
@@ -124,6 +124,69 @@ notable_bug_fix2
124124
Backwards incompatible API changes
125125
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
126126

127+
.. _whatsnew_300.api_breaking.datetime_resolution_inference:
128+
129+
Datetime resolution inference
130+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
131+
132+
Converting a sequence of strings, ``datetime`` objects, or ``np.datetime64`` objects to
133+
a ``datetime64`` dtype now performs inference on the appropriate resolution (AKA unit) for the output dtype. This affects :class:`Series`, :class:`DataFrame`, :class:`Index`, :class:`DatetimeIndex`, and :func:`to_datetime`.
134+
135+
Previously, these would always give nanosecond resolution:
136+
137+
.. code-block:: ipython
138+
139+
In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime()
140+
In [2]: pd.to_datetime([dt]).dtype
141+
Out[2]: dtype('<M8[ns]')
142+
In [3]: pd.Index([dt]).dtype
143+
Out[3]: dtype('<M8[ns]')
144+
In [4]: pd.DatetimeIndex([dt]).dtype
145+
Out[4]: dtype('<M8[ns]')
146+
In [5]: pd.Series([dt]).dtype
147+
Out[5]: dtype('<M8[ns]')
148+
149+
This now infers the unit microsecond unit "us" from the pydatetime object, matching the scalar :class:`Timestamp` behavior.
150+
151+
.. ipython:: python
152+
153+
In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime()
154+
In [2]: pd.to_datetime([dt]).dtype
155+
In [3]: pd.Index([dt]).dtype
156+
In [4]: pd.DatetimeIndex([dt]).dtype
157+
In [5]: pd.Series([dt]).dtype
158+
159+
Similar when passed a sequence of ``np.datetime64`` objects, the resolution of the passed objects will be retained (or for lower-than-second resolution, second resolution will be used).
160+
161+
When passing strings, the resolution will depend on the precision of the string, again matching the :class:`Timestamp` behavior. Previously:
162+
163+
.. code-block:: ipython
164+
165+
In [2]: pd.to_datetime(["2024-03-22 11:43:01"]).dtype
166+
Out[2]: dtype('<M8[ns]')
167+
In [3]: pd.to_datetime(["2024-03-22 11:43:01.002"]).dtype
168+
Out[3]: dtype('<M8[ns]')
169+
In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
170+
Out[4]: dtype('<M8[ns]')
171+
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
172+
Out[5]: dtype('<M8[ns]')
173+
174+
The inferred resolution now matches that of the input strings:
175+
176+
.. ipython:: python
177+
178+
In [2]: pd.to_datetime(["2024-03-22 11:43:01"]).dtype
179+
In [3]: pd.to_datetime(["2024-03-22 11:43:01.002"]).dtype
180+
In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
181+
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
182+
183+
In cases with mixed-resolution inputs, the highest resolution is used:
184+
185+
.. code-block:: ipython
186+
187+
In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
188+
Out[2]: dtype('<M8[ns]')
189+
127190
.. _whatsnew_300.api_breaking.deps:
128191

129192
Increased minimum versions for dependencies
@@ -201,6 +264,7 @@ Other Deprecations
201264
^^^^^^^^^^^^^^^^^^
202265

203266
- Deprecated :func:`core.internals.api.make_block`, use public APIs instead (:issue:`56815`)
267+
- Deprecated :meth:`.DataFrameGroupby.corrwith` (:issue:`57158`)
204268
- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
205269
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
206270
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
@@ -248,6 +312,7 @@ Removal of prior version deprecations/changes
248312
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
249313
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
250314
- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
315+
- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
251316
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
252317
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
253318
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
@@ -440,6 +505,7 @@ Missing
440505
MultiIndex
441506
^^^^^^^^^^
442507
- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`)
508+
- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`)
443509
-
444510

445511
I/O
@@ -472,7 +538,7 @@ Groupby/resample/rolling
472538
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
473539
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
474540
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
475-
541+
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
476542

477543
Reshaping
478544
^^^^^^^^^

pandas/_config/config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,14 @@ def set_option(*args) -> None:
211211
TypeError if keyword arguments are provided
212212
OptionError if no such option exists
213213
214+
See Also
215+
--------
216+
get_option : Retrieve the value of the specified option.
217+
reset_option : Reset one or more options to their default value.
218+
describe_option : Print the description for one or more registered options.
219+
option_context : Context manager to temporarily set options in a ``with``
220+
statement.
221+
214222
Notes
215223
-----
216224
For all available options, please view the :ref:`User Guide <options.available>`
@@ -315,6 +323,12 @@ def reset_option(pat: str) -> None:
315323
None
316324
No return value.
317325
326+
See Also
327+
--------
328+
get_option : Retrieve the value of the specified option.
329+
set_option : Set the value of the specified option or options.
330+
describe_option : Print the description for one or more registered options.
331+
318332
Notes
319333
-----
320334
For all available options, please view the
@@ -406,6 +420,13 @@ def option_context(*args) -> Generator[None, None, None]:
406420
None
407421
No return value.
408422
423+
See Also
424+
--------
425+
get_option : Retrieve the value of the specified option.
426+
set_option : Set the value of the specified option.
427+
reset_option : Reset one or more options to their default value.
428+
describe_option : Print the description for one or more registered options.
429+
409430
Notes
410431
-----
411432
For all available options, please view the :ref:`User Guide <options.available>`

0 commit comments

Comments
 (0)