Skip to content

Commit 525fe34

Browse files
committed
Merge remote-tracking branch 'upstream/main' into tst/pip_extras
2 parents d2fb7eb + 8f869f3 commit 525fe34

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+344
-293
lines changed

asv_bench/benchmarks/tslibs/tslib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class TimeIntsToPydatetime:
5151
_tzs,
5252
)
5353
param_names = ["box", "size", "tz"]
54-
# TODO: fold? freq?
54+
# TODO: fold?
5555

5656
def setup(self, box, size, tz):
5757
if box == "date" and tz is not None:

doc/source/whatsnew/v2.0.0.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ Other API changes
291291
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
292292
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
293293
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
294-
- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
294+
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
295+
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
295296
-
296297

297298
.. ---------------------------------------------------------------------------
@@ -531,6 +532,7 @@ Performance improvements
531532
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
532533
- Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
533534
- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
535+
- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`)
534536
- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
535537

536538
.. ---------------------------------------------------------------------------
@@ -605,7 +607,7 @@ Missing
605607

606608
MultiIndex
607609
^^^^^^^^^^
608-
- Bug in :meth:`MultiIndex.get_indexer` not matching ``NaN`` values (:issue:`37222`)
610+
- Bug in :meth:`MultiIndex.get_indexer` not matching ``NaN`` values (:issue:`29252`, :issue:`37222`, :issue:`38623`, :issue:`42883`, :issue:`43222`, :issue:`46173`, :issue:`48905`)
609611
- Bug in :meth:`MultiIndex.argsort` raising ``TypeError`` when index contains :attr:`NA` (:issue:`48495`)
610612
- Bug in :meth:`MultiIndex.difference` losing extension array dtype (:issue:`48606`)
611613
- Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`)
@@ -615,6 +617,7 @@ MultiIndex
615617
- Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`)
616618
- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
617619
- Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
620+
- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`)
618621
-
619622

620623
I/O

pandas/core/algorithms.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,14 @@ def value_counts(
871871
result.name = name
872872
counts = result._values
873873

874+
elif isinstance(values, ABCMultiIndex):
875+
# GH49558
876+
levels = list(range(values.nlevels))
877+
result = Series(index=values).groupby(level=levels, dropna=dropna).size()
878+
# TODO: allow index names to remain (see discussion in GH49497)
879+
result.index.names = [None] * values.nlevels
880+
counts = result._values
881+
874882
else:
875883
values = _ensure_arraylike(values)
876884
keys, counts = value_counts_arraylike(values, dropna)
@@ -1247,7 +1255,7 @@ def compute(self, method: str) -> Series:
12471255
inds = inds[:n]
12481256
findex = nbase
12491257
else:
1250-
if len(inds) < nbase and len(nan_index) + len(inds) >= nbase:
1258+
if len(inds) < nbase <= len(nan_index) + len(inds):
12511259
findex = len(nan_index) + len(inds)
12521260
else:
12531261
findex = len(inds)

pandas/core/arrays/_mixins.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -234,21 +234,9 @@ def searchsorted(
234234
side: Literal["left", "right"] = "left",
235235
sorter: NumpySorter = None,
236236
) -> npt.NDArray[np.intp] | np.intp:
237-
# TODO(2.0): use _validate_setitem_value once dt64tz mismatched-timezone
238-
# deprecation is enforced
239-
npvalue = self._validate_searchsorted_value(value)
237+
npvalue = self._validate_setitem_value(value)
240238
return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
241239

242-
def _validate_searchsorted_value(
243-
self, value: NumpyValueArrayLike | ExtensionArray
244-
) -> NumpyValueArrayLike:
245-
# TODO(2.0): after deprecation in datetimelikearraymixin is enforced,
246-
# we can remove this and use _validate_setitem_value directly
247-
if isinstance(value, ExtensionArray):
248-
return value.to_numpy()
249-
else:
250-
return value
251-
252240
@doc(ExtensionArray.shift)
253241
def shift(self, periods: int = 1, fill_value=None, axis: AxisInt = 0):
254242

pandas/core/arrays/_ranges.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,12 @@ def _generate_range_overflow_safe(
121121
return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
122122

123123
elif (endpoint > 0 and side == "start" and stride > 0) or (
124-
endpoint < 0 and side == "end" and stride > 0
124+
endpoint < 0 < stride and side == "end"
125125
):
126126
# no chance of not-overflowing
127127
raise OutOfBoundsDatetime(msg)
128128

129-
elif side == "end" and endpoint > i64max and endpoint - stride <= i64max:
129+
elif side == "end" and endpoint - stride <= i64max < endpoint:
130130
# in _generate_regular_range we added `stride` thereby overflowing
131131
# the bounds. Adjust to fix this.
132132
return _generate_range_overflow_safe(

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,10 @@ def reorder_categories(self, new_categories, ordered=None):
10191019
remove_unused_categories : Remove categories which are not used.
10201020
set_categories : Set the categories to the specified ones.
10211021
"""
1022-
if set(self.dtype.categories) != set(new_categories):
1022+
if (
1023+
len(self.categories) != len(new_categories)
1024+
or not self.categories.difference(new_categories).empty
1025+
):
10231026
raise ValueError(
10241027
"items in new_categories are not the same as in old categories"
10251028
)
@@ -1301,8 +1304,6 @@ def _validate_setitem_value(self, value):
13011304
else:
13021305
return self._validate_scalar(value)
13031306

1304-
_validate_searchsorted_value = _validate_setitem_value
1305-
13061307
def _validate_scalar(self, fill_value):
13071308
"""
13081309
Convert a user-facing fill_value to a representation to use with our

pandas/core/arrays/datetimelike.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,6 @@ def _validate_scalar(
600600
value,
601601
*,
602602
allow_listlike: bool = False,
603-
setitem: bool = True,
604603
unbox: bool = True,
605604
):
606605
"""
@@ -612,8 +611,6 @@ def _validate_scalar(
612611
allow_listlike: bool, default False
613612
When raising an exception, whether the message should say
614613
listlike inputs are allowed.
615-
setitem : bool, default True
616-
Whether to check compatibility with setitem strictness.
617614
unbox : bool, default True
618615
Whether to unbox the result before returning. Note: unbox=False
619616
skips the setitem compatibility check.
@@ -735,14 +732,6 @@ def _validate_listlike(self, value, allow_object: bool = False):
735732

736733
return value
737734

738-
def _validate_searchsorted_value(self, value):
739-
if not is_list_like(value):
740-
return self._validate_scalar(value, allow_listlike=True, setitem=False)
741-
else:
742-
value = self._validate_listlike(value)
743-
744-
return self._unbox(value)
745-
746735
def _validate_setitem_value(self, value):
747736
if is_list_like(value):
748737
value = self._validate_listlike(value)
@@ -1363,10 +1352,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
13631352
# Caller is responsible for broadcasting if necessary
13641353
assert self.shape == other.shape, (self.shape, other.shape)
13651354

1366-
with warnings.catch_warnings():
1367-
# filter out warnings about Timestamp.freq
1368-
warnings.filterwarnings("ignore", category=FutureWarning)
1369-
res_values = op(self.astype("O"), np.asarray(other))
1355+
res_values = op(self.astype("O"), np.asarray(other))
13701356

13711357
result = pd_array(res_values.ravel())
13721358
result = extract_array(result, extract_numpy=True).reshape(self.shape)

pandas/core/arrays/datetimes.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,6 @@ def _add_offset(self, offset) -> DatetimeArray:
751751
else:
752752
result = DatetimeArray._simple_new(result, dtype=result.dtype)
753753
if self.tz is not None:
754-
# FIXME: tz_localize with non-nano
755754
result = result.tz_localize(self.tz)
756755

757756
return result

pandas/core/arrays/period.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ def searchsorted(
692692
side: Literal["left", "right"] = "left",
693693
sorter: NumpySorter = None,
694694
) -> npt.NDArray[np.intp] | np.intp:
695-
npvalue = self._validate_searchsorted_value(value).view("M8[ns]")
695+
npvalue = self._validate_setitem_value(value).view("M8[ns]")
696696

697697
# Cast to M8 to get datetime-like NaT placement
698698
m8arr = self._ndarray.view("M8[ns]")

pandas/core/construction.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,15 @@ def sanitize_array(
597597
# e.g. test_constructor_floating_data_int_dtype
598598
# TODO: where is the discussion that documents the reason for this?
599599
subarr = np.array(data, copy=copy)
600+
601+
elif dtype is None:
602+
subarr = data
603+
if data.dtype == object:
604+
subarr = maybe_infer_to_datetimelike(data)
605+
606+
if subarr is data and copy:
607+
subarr = subarr.copy()
608+
600609
else:
601610
# we will try to copy by-definition here
602611
subarr = _try_cast(data, dtype, copy)
@@ -666,7 +675,7 @@ def range_to_ndarray(rng: range) -> np.ndarray:
666675
arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")
667676
except OverflowError:
668677
# GH#30173 handling for ranges that overflow int64
669-
if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0):
678+
if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):
670679
try:
671680
arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")
672681
except OverflowError:
@@ -754,7 +763,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
754763

755764
def _try_cast(
756765
arr: list | np.ndarray,
757-
dtype: np.dtype | None,
766+
dtype: np.dtype,
758767
copy: bool,
759768
) -> ArrayLike:
760769
"""
@@ -764,7 +773,7 @@ def _try_cast(
764773
----------
765774
arr : ndarray or list
766775
Excludes: ExtensionArray, Series, Index.
767-
dtype : np.dtype or None
776+
dtype : np.dtype
768777
copy : bool
769778
If False, don't copy the data if not needed.
770779
@@ -774,30 +783,7 @@ def _try_cast(
774783
"""
775784
is_ndarray = isinstance(arr, np.ndarray)
776785

777-
if dtype is None:
778-
# perf shortcut as this is the most common case
779-
if is_ndarray:
780-
arr = cast(np.ndarray, arr)
781-
if arr.dtype != object:
782-
if copy:
783-
return arr.copy()
784-
return arr
785-
786-
out = maybe_infer_to_datetimelike(arr)
787-
if out is arr and copy:
788-
out = out.copy()
789-
return out
790-
791-
else:
792-
# i.e. list
793-
varr = np.array(arr, copy=False)
794-
# filter out cases that we _dont_ want to go through
795-
# maybe_infer_to_datetimelike
796-
if varr.dtype != object or varr.size == 0:
797-
return varr
798-
return maybe_infer_to_datetimelike(varr)
799-
800-
elif is_object_dtype(dtype):
786+
if is_object_dtype(dtype):
801787
if not is_ndarray:
802788
subarr = construct_1d_object_array_from_listlike(arr)
803789
return subarr

pandas/core/dtypes/cast.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,15 +1029,12 @@ def soft_convert_objects(
10291029
if datetime or timedelta:
10301030
# GH 20380, when datetime is beyond year 2262, hence outside
10311031
# bound of nanosecond-resolution 64-bit integers.
1032-
try:
1033-
converted = lib.maybe_convert_objects(
1034-
values,
1035-
convert_datetime=datetime,
1036-
convert_timedelta=timedelta,
1037-
convert_period=period,
1038-
)
1039-
except (OutOfBoundsDatetime, ValueError):
1040-
return values
1032+
converted = lib.maybe_convert_objects(
1033+
values,
1034+
convert_datetime=datetime,
1035+
convert_timedelta=timedelta,
1036+
convert_period=period,
1037+
)
10411038
if converted is not values:
10421039
return converted
10431040

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def __init__(
738738

739739
# For data is list-like, or Iterable (will consume into list)
740740
elif is_list_like(data):
741-
if not isinstance(data, (abc.Sequence, ExtensionArray)):
741+
if not isinstance(data, abc.Sequence):
742742
if hasattr(data, "__array__"):
743743
# GH#44616 big perf improvement for e.g. pytorch tensor
744744
data = np.asarray(data)

pandas/core/indexes/base.py

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
find_common_type,
8282
infer_dtype_from,
8383
maybe_cast_pointwise_result,
84+
maybe_infer_to_datetimelike,
8485
np_can_hold_element,
8586
)
8687
from pandas.core.dtypes.common import (
@@ -503,9 +504,8 @@ def __new__(
503504
arr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
504505

505506
if dtype is None:
506-
arr = _maybe_cast_data_without_dtype(
507-
arr, cast_numeric_deprecated=True
508-
)
507+
arr = maybe_infer_to_datetimelike(arr)
508+
arr = ensure_wrapped_if_datetimelike(arr)
509509
dtype = arr.dtype
510510

511511
klass = cls._dtype_to_subclass(arr.dtype)
@@ -534,9 +534,7 @@ def __new__(
534534
subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
535535
if dtype is None:
536536
# with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated
537-
subarr = _maybe_cast_data_without_dtype(
538-
subarr, cast_numeric_deprecated=False
539-
)
537+
subarr = _maybe_cast_data_without_dtype(subarr)
540538
dtype = subarr.dtype
541539
return Index(subarr, dtype=dtype, copy=copy, name=name)
542540

@@ -3500,13 +3498,7 @@ def _assert_can_do_setop(self, other) -> bool:
35003498

35013499
def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
35023500
if not isinstance(other, Index):
3503-
# TODO(2.0): no need to special-case here once _with_infer
3504-
# deprecation is enforced
3505-
if hasattr(other, "dtype"):
3506-
other = Index(other, name=self.name, dtype=other.dtype)
3507-
else:
3508-
# e.g. list
3509-
other = Index(other, name=self.name)
3501+
other = Index(other, name=self.name)
35103502
result_name = self.name
35113503
else:
35123504
result_name = get_op_result_name(self, other)
@@ -7062,18 +7054,14 @@ def maybe_extract_name(name, obj, cls) -> Hashable:
70627054
return name
70637055

70647056

7065-
def _maybe_cast_data_without_dtype(
7066-
subarr: np.ndarray, cast_numeric_deprecated: bool = True
7067-
) -> ArrayLike:
7057+
def _maybe_cast_data_without_dtype(subarr: npt.NDArray[np.object_]) -> ArrayLike:
70687058
"""
70697059
If we have an arraylike input but no passed dtype, try to infer
70707060
a supported dtype.
70717061
70727062
Parameters
70737063
----------
70747064
subarr : np.ndarray[object]
7075-
cast_numeric_deprecated : bool, default True
7076-
Whether to issue a FutureWarning when inferring numeric dtypes.
70777065
70787066
Returns
70797067
-------
@@ -7088,12 +7076,6 @@ def _maybe_cast_data_without_dtype(
70887076
convert_interval=True,
70897077
dtype_if_all_nat=np.dtype("datetime64[ns]"),
70907078
)
7091-
if result.dtype.kind in ["i", "u", "f"]:
7092-
if not cast_numeric_deprecated:
7093-
# i.e. we started with a list, not an ndarray[object]
7094-
return result
7095-
return subarr
7096-
70977079
result = ensure_wrapped_if_datetimelike(result)
70987080
return result
70997081

pandas/core/internals/array_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,7 @@ def _equal_values(self, other) -> bool:
697697
for left, right in zip(self.arrays, other.arrays):
698698
if not array_equals(left, right):
699699
return False
700-
else:
701-
return True
700+
return True
702701

703702
# TODO
704703
# to_dict

pandas/core/internals/blocks.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,15 +1920,11 @@ def _catch_deprecated_value_error(err: Exception) -> None:
19201920
which will no longer be raised in version.2.0.
19211921
"""
19221922
if isinstance(err, ValueError):
1923-
# TODO(2.0): once DTA._validate_setitem_value deprecation
1924-
# is enforced, stop catching ValueError here altogether
19251923
if isinstance(err, IncompatibleFrequency):
19261924
pass
19271925
elif "'value.closed' is" in str(err):
19281926
# IntervalDtype mismatched 'closed'
19291927
pass
1930-
elif "Timezones don't match" not in str(err):
1931-
raise err
19321928

19331929

19341930
class DatetimeLikeBlock(NDArrayBackedExtensionBlock):

0 commit comments

Comments
 (0)