Skip to content

Commit b49e273

Browse files
authored
Merge branch 'pandas-dev:main' into feature
2 parents c379bd0 + b0c4194 commit b49e273

File tree

97 files changed

+967
-527
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+967
-527
lines changed

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ jobs:
140140
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
141141

142142
- name: Build wheels
143-
uses: pypa/[email protected].0
143+
uses: pypa/[email protected].1
144144
with:
145145
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
146146
env:

asv_bench/benchmarks/tslibs/fields.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ class TimeGetTimedeltaField:
1919
def setup(self, size, field):
2020
arr = np.random.randint(0, 10, size=size, dtype="i8")
2121
self.i8data = arr
22+
arr = np.random.randint(-86400 * 1_000_000_000, 0, size=size, dtype="i8")
23+
self.i8data_negative = arr
2224

2325
def time_get_timedelta_field(self, size, field):
2426
get_timedelta_field(self.i8data, field)
2527

28+
def time_get_timedelta_field_negative_td(self, size, field):
29+
get_timedelta_field(self.i8data_negative, field)
30+
2631

2732
class TimeGetDateField:
2833
params = [
@@ -72,3 +77,6 @@ def setup(self, size, side, period, freqstr, month_kw):
7277

7378
def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
7479
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
80+
81+
82+
from ..pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -74,39 +74,31 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7474
-i "pandas.DataFrame.mean RT03,SA01" \
7575
-i "pandas.DataFrame.median RT03,SA01" \
7676
-i "pandas.DataFrame.min RT03" \
77-
-i "pandas.DataFrame.plot PR02,SA01" \
77+
-i "pandas.DataFrame.plot PR02" \
7878
-i "pandas.Grouper PR02" \
79-
-i "pandas.MultiIndex PR01" \
8079
-i "pandas.MultiIndex.append PR07,SA01" \
8180
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
8281
-i "pandas.MultiIndex.drop PR07,RT03,SA01" \
83-
-i "pandas.MultiIndex.dtypes SA01" \
8482
-i "pandas.MultiIndex.get_level_values SA01" \
8583
-i "pandas.MultiIndex.get_loc PR07" \
8684
-i "pandas.MultiIndex.get_loc_level PR07" \
87-
-i "pandas.MultiIndex.levels SA01" \
8885
-i "pandas.MultiIndex.levshape SA01" \
8986
-i "pandas.MultiIndex.names SA01" \
9087
-i "pandas.MultiIndex.nlevels SA01" \
9188
-i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
9289
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
93-
-i "pandas.MultiIndex.set_codes SA01" \
9490
-i "pandas.MultiIndex.set_levels RT03,SA01" \
9591
-i "pandas.MultiIndex.sortlevel PR07,SA01" \
9692
-i "pandas.MultiIndex.to_frame RT03" \
97-
-i "pandas.MultiIndex.truncate SA01" \
9893
-i "pandas.NA SA01" \
9994
-i "pandas.NaT SA01" \
10095
-i "pandas.NamedAgg SA01" \
101-
-i "pandas.Period SA01" \
10296
-i "pandas.Period.asfreq SA01" \
10397
-i "pandas.Period.freq GL08" \
10498
-i "pandas.Period.freqstr SA01" \
105-
-i "pandas.Period.is_leap_year SA01" \
10699
-i "pandas.Period.month SA01" \
107100
-i "pandas.Period.now SA01" \
108101
-i "pandas.Period.ordinal GL08" \
109-
-i "pandas.Period.quarter SA01" \
110102
-i "pandas.Period.strftime PR01,SA01" \
111103
-i "pandas.Period.to_timestamp SA01" \
112104
-i "pandas.Period.year SA01" \
@@ -172,7 +164,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
172164
-i "pandas.Series.lt SA01" \
173165
-i "pandas.Series.ne SA01" \
174166
-i "pandas.Series.pad PR01,SA01" \
175-
-i "pandas.Series.plot PR02,SA01" \
167+
-i "pandas.Series.plot PR02" \
176168
-i "pandas.Series.pop RT03,SA01" \
177169
-i "pandas.Series.prod RT03" \
178170
-i "pandas.Series.product RT03" \
@@ -367,7 +359,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
367359
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
368360
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
369361
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
370-
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02,SA01" \
362+
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
371363
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
372364
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
373365
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
@@ -385,7 +377,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
385377
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
386378
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
387379
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
388-
-i "pandas.core.groupby.SeriesGroupBy.plot PR02,SA01" \
380+
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
389381
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
390382
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
391383
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
@@ -486,23 +478,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
486478
-i "pandas.plotting.autocorrelation_plot RT03,SA01" \
487479
-i "pandas.plotting.lag_plot RT03,SA01" \
488480
-i "pandas.plotting.parallel_coordinates PR07,RT03,SA01" \
489-
-i "pandas.plotting.plot_params SA01" \
490481
-i "pandas.plotting.scatter_matrix PR07,SA01" \
491482
-i "pandas.plotting.table PR07,RT03,SA01" \
492483
-i "pandas.qcut PR07,SA01" \
493-
-i "pandas.read_feather SA01" \
494484
-i "pandas.read_orc SA01" \
495-
-i "pandas.read_sas SA01" \
496485
-i "pandas.read_spss SA01" \
497486
-i "pandas.reset_option SA01" \
498487
-i "pandas.set_eng_float_format RT03,SA01" \
499-
-i "pandas.set_option SA01" \
500488
-i "pandas.show_versions SA01" \
501-
-i "pandas.test SA01" \
502489
-i "pandas.testing.assert_extension_array_equal SA01" \
503-
-i "pandas.testing.assert_index_equal PR07,SA01" \
504490
-i "pandas.testing.assert_series_equal PR07,SA01" \
505-
-i "pandas.timedelta_range SA01" \
506491
-i "pandas.tseries.offsets.BDay PR02,SA01" \
507492
-i "pandas.tseries.offsets.BQuarterBegin PR02" \
508493
-i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
@@ -787,7 +772,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
787772
-i "pandas.tseries.offsets.YearBegin.nanos GL08" \
788773
-i "pandas.tseries.offsets.YearBegin.normalize GL08" \
789774
-i "pandas.tseries.offsets.YearBegin.rule_code GL08" \
790-
-i "pandas.tseries.offsets.YearEnd PR02" \
791775
-i "pandas.tseries.offsets.YearEnd.freqstr SA01" \
792776
-i "pandas.tseries.offsets.YearEnd.is_on_offset GL08" \
793777
-i "pandas.tseries.offsets.YearEnd.month GL08" \
@@ -796,7 +780,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
796780
-i "pandas.tseries.offsets.YearEnd.normalize GL08" \
797781
-i "pandas.tseries.offsets.YearEnd.rule_code GL08" \
798782
-i "pandas.unique PR07" \
799-
-i "pandas.util.hash_array PR07,SA01" \
800783
-i "pandas.util.hash_pandas_object PR07,SA01" # There should be no backslash in the final line, please keep this comment in the last ignored function
801784

802785
RET=$(($RET + $?)) ; echo $MSG "DONE"

doc/source/user_guide/merging.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ with optional filling of missing data with ``fill_method``.
974974
:func:`merge_asof`
975975
---------------------
976976

977-
:func:`merge_asof` is similar to an ordered left-join except that mactches are on the
977+
:func:`merge_asof` is similar to an ordered left-join except that matches are on the
978978
nearest key rather than equal keys. For each row in the ``left`` :class:`DataFrame`,
979979
the last row in the ``right`` :class:`DataFrame` are selected where the ``on`` key is less
980980
than the left's key. Both :class:`DataFrame` must be sorted by the key.

doc/source/user_guide/missing_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use :class:`api.typing.NaTType`.
3232
:class:`NA` for :class:`StringDtype`, :class:`Int64Dtype` (and other bit widths),
3333
:class:`Float64Dtype` (and other bit widths), :class:`BooleanDtype` and :class:`ArrowDtype`.
3434
These types will maintain the original data type of the data.
35-
For typing applications, use :class:`api.types.NAType`.
35+
For typing applications, use :class:`api.typing.NAType`.
3636

3737
.. ipython:: python
3838

doc/source/user_guide/timeseries.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,10 @@ frequencies. We will refer to these aliases as *offset aliases*.
12731273
are deprecated in favour of the aliases ``h``, ``bh``, ``cbh``,
12741274
``min``, ``s``, ``ms``, ``us``, and ``ns``.
12751275

1276+
Aliases ``Y``, ``M``, and ``Q`` are deprecated in favour of the aliases
1277+
``YE``, ``ME``, ``QE``.
1278+
1279+
12761280
.. note::
12771281

12781282
When using the offset aliases above, it should be noted that functions

doc/source/whatsnew/v3.0.0.rst

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,69 @@ notable_bug_fix2
124124
Backwards incompatible API changes
125125
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
126126

127+
.. _whatsnew_300.api_breaking.datetime_resolution_inference:
128+
129+
Datetime resolution inference
130+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
131+
132+
Converting a sequence of strings, ``datetime`` objects, or ``np.datetime64`` objects to
133+
a ``datetime64`` dtype now performs inference on the appropriate resolution (AKA unit) for the output dtype. This affects :class:`Series`, :class:`DataFrame`, :class:`Index`, :class:`DatetimeIndex`, and :func:`to_datetime`.
134+
135+
Previously, these would always give nanosecond resolution:
136+
137+
.. code-block:: ipython
138+
139+
In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime()
140+
In [2]: pd.to_datetime([dt]).dtype
141+
Out[2]: dtype('<M8[ns]')
142+
In [3]: pd.Index([dt]).dtype
143+
Out[3]: dtype('<M8[ns]')
144+
In [4]: pd.DatetimeIndex([dt]).dtype
145+
Out[4]: dtype('<M8[ns]')
146+
In [5]: pd.Series([dt]).dtype
147+
Out[5]: dtype('<M8[ns]')
148+
149+
This now infers the unit microsecond unit "us" from the pydatetime object, matching the scalar :class:`Timestamp` behavior.
150+
151+
.. ipython:: python
152+
153+
In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime()
154+
In [2]: pd.to_datetime([dt]).dtype
155+
In [3]: pd.Index([dt]).dtype
156+
In [4]: pd.DatetimeIndex([dt]).dtype
157+
In [5]: pd.Series([dt]).dtype
158+
159+
Similar when passed a sequence of ``np.datetime64`` objects, the resolution of the passed objects will be retained (or for lower-than-second resolution, second resolution will be used).
160+
161+
When passing strings, the resolution will depend on the precision of the string, again matching the :class:`Timestamp` behavior. Previously:
162+
163+
.. code-block:: ipython
164+
165+
In [2]: pd.to_datetime(["2024-03-22 11:43:01"]).dtype
166+
Out[2]: dtype('<M8[ns]')
167+
In [3]: pd.to_datetime(["2024-03-22 11:43:01.002"]).dtype
168+
Out[3]: dtype('<M8[ns]')
169+
In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
170+
Out[4]: dtype('<M8[ns]')
171+
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
172+
Out[5]: dtype('<M8[ns]')
173+
174+
The inferred resolution now matches that of the input strings:
175+
176+
.. ipython:: python
177+
178+
In [2]: pd.to_datetime(["2024-03-22 11:43:01"]).dtype
179+
In [3]: pd.to_datetime(["2024-03-22 11:43:01.002"]).dtype
180+
In [4]: pd.to_datetime(["2024-03-22 11:43:01.002003"]).dtype
181+
In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype
182+
183+
In cases with mixed-resolution inputs, the highest resolution is used:
184+
185+
.. code-block:: ipython
186+
187+
In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
188+
Out[2]: dtype('<M8[ns]')
189+
127190
.. _whatsnew_300.api_breaking.deps:
128191

129192
Increased minimum versions for dependencies

pandas/_config/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,14 @@ def set_option(*args) -> None:
211211
TypeError if keyword arguments are provided
212212
OptionError if no such option exists
213213
214+
See Also
215+
--------
216+
get_option : Retrieve the value of the specified option.
217+
reset_option : Reset one or more options to their default value.
218+
describe_option : Print the description for one or more registered options.
219+
option_context : Context manager to temporarily set options in a ``with``
220+
statement.
221+
214222
Notes
215223
-----
216224
For all available options, please view the :ref:`User Guide <options.available>`

pandas/_libs/include/pandas/vendored/klib/khash_python.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,20 +156,20 @@ KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
156156

157157
// NaN-floats should be in the same equivalency class, see GH 22119
158158
static inline int floatobject_cmp(PyFloatObject *a, PyFloatObject *b) {
159-
return (Py_IS_NAN(PyFloat_AS_DOUBLE(a)) && Py_IS_NAN(PyFloat_AS_DOUBLE(b))) ||
159+
return (isnan(PyFloat_AS_DOUBLE(a)) && isnan(PyFloat_AS_DOUBLE(b))) ||
160160
(PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b));
161161
}
162162

163163
// NaNs should be in the same equivalency class, see GH 41836
164164
// PyObject_RichCompareBool for complexobjects has a different behavior
165165
// needs to be replaced
166166
static inline int complexobject_cmp(PyComplexObject *a, PyComplexObject *b) {
167-
return (Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) &&
168-
Py_IS_NAN(a->cval.imag) && Py_IS_NAN(b->cval.imag)) ||
169-
(Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) &&
167+
return (isnan(a->cval.real) && isnan(b->cval.real) && isnan(a->cval.imag) &&
168+
isnan(b->cval.imag)) ||
169+
(isnan(a->cval.real) && isnan(b->cval.real) &&
170170
a->cval.imag == b->cval.imag) ||
171-
(a->cval.real == b->cval.real && Py_IS_NAN(a->cval.imag) &&
172-
Py_IS_NAN(b->cval.imag)) ||
171+
(a->cval.real == b->cval.real && isnan(a->cval.imag) &&
172+
isnan(b->cval.imag)) ||
173173
(a->cval.real == b->cval.real && a->cval.imag == b->cval.imag);
174174
}
175175

@@ -223,7 +223,7 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
223223

224224
static inline Py_hash_t _Pandas_HashDouble(double val) {
225225
// Since Python3.10, nan is no longer has hash 0
226-
if (Py_IS_NAN(val)) {
226+
if (isnan(val)) {
227227
return 0;
228228
}
229229
#if PY_VERSION_HEX < 0x030A0000

pandas/_libs/lib.pyx

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,12 @@ from pandas._libs.missing cimport (
9696
is_null_datetime64,
9797
is_null_timedelta64,
9898
)
99-
from pandas._libs.tslibs.conversion cimport (
100-
_TSObject,
101-
convert_to_tsobject,
102-
)
99+
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
103100
from pandas._libs.tslibs.nattype cimport (
104101
NPY_NAT,
105102
c_NaT as NaT,
106103
checknull_with_nat,
107104
)
108-
from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns
109105
from pandas._libs.tslibs.offsets cimport is_offset_object
110106
from pandas._libs.tslibs.period cimport is_period_object
111107
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
@@ -2497,7 +2493,6 @@ def maybe_convert_objects(ndarray[object] objects,
24972493
ndarray[uint8_t] mask
24982494
Seen seen = Seen()
24992495
object val
2500-
_TSObject tsobj
25012496
float64_t fnan = NaN
25022497

25032498
if dtype_if_all_nat is not None:
@@ -2604,8 +2599,7 @@ def maybe_convert_objects(ndarray[object] objects,
26042599
else:
26052600
seen.datetime_ = True
26062601
try:
2607-
tsobj = convert_to_tsobject(val, None, None, 0, 0)
2608-
tsobj.ensure_reso(NPY_FR_ns)
2602+
convert_to_tsobject(val, None, None, 0, 0)
26092603
except OutOfBoundsDatetime:
26102604
# e.g. test_out_of_s_bounds_datetime64
26112605
seen.object_ = True

pandas/_libs/tslib.pyx

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ from pandas._libs.tslibs.conversion cimport (
6363
get_datetime64_nanos,
6464
parse_pydatetime,
6565
)
66-
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
66+
from pandas._libs.tslibs.dtypes cimport (
67+
get_supported_reso,
68+
npy_unit_to_abbrev,
69+
)
6770
from pandas._libs.tslibs.nattype cimport (
6871
NPY_NAT,
6972
c_nat_strings as nat_strings,
@@ -260,7 +263,7 @@ cpdef array_to_datetime(
260263
bint dayfirst=False,
261264
bint yearfirst=False,
262265
bint utc=False,
263-
NPY_DATETIMEUNIT creso=NPY_FR_ns,
266+
NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC,
264267
str unit_for_numerics=None,
265268
):
266269
"""
@@ -288,8 +291,8 @@ cpdef array_to_datetime(
288291
yearfirst parsing behavior when encountering datetime strings
289292
utc : bool, default False
290293
indicator whether the dates should be UTC
291-
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
292-
Set to NPY_FR_GENERIC to infer a resolution.
294+
creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC
295+
If NPY_FR_GENERIC, conduct inference.
293296
unit_for_numerics : str, default "ns"
294297
295298
Returns
@@ -389,7 +392,7 @@ cpdef array_to_datetime(
389392
# GH#32264 np.str_ object
390393
val = str(val)
391394

392-
if parse_today_now(val, &iresult[i], utc, creso):
395+
if parse_today_now(val, &iresult[i], utc, creso, infer_reso=infer_reso):
393396
# We can't _quite_ dispatch this to convert_str_to_tsobject
394397
# bc there isn't a nice way to pass "utc"
395398
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
@@ -533,7 +536,9 @@ def array_to_datetime_with_tz(
533536
if state.creso_ever_changed:
534537
# We encountered mismatched resolutions, need to re-parse with
535538
# the correct one.
536-
return array_to_datetime_with_tz(values, tz=tz, creso=creso)
539+
return array_to_datetime_with_tz(
540+
values, tz=tz, dayfirst=dayfirst, yearfirst=yearfirst, creso=creso
541+
)
537542
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
538543
# i.e. we never encountered anything non-NaT, default to "s". This
539544
# ensures that insert and concat-like operations with NaT

pandas/_libs/tslibs/offsets.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2579,7 +2579,7 @@ cdef class YearEnd(YearOffset):
25792579
25802580
YearEnd goes to the next date which is the end of the year.
25812581
2582-
Parameters
2582+
Attributes
25832583
----------
25842584
n : int, default 1
25852585
The number of years represented.

0 commit comments

Comments
 (0)