pandas-dev
diff --git a/‎asv_bench/benchmarks/tslibs/tslib.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/tslibs/tslib.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 5 additions & 2 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 5 additions & 2 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 9 additions & 1 deletion b/‎pandas/core/algorithms.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/core/arrays/_mixins.py
Lines changed: 1 addition & 13 deletions b/‎pandas/core/arrays/_mixins.py
Lines changed: 1 addition & 13 deletions
diff --git a/‎pandas/core/arrays/_ranges.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/arrays/_ranges.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 4 additions & 3 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 1 addition & 15 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 1 addition & 15 deletions
diff --git a/‎pandas/core/arrays/datetimes.py
Lines changed: 0 additions & 1 deletion b/‎pandas/core/arrays/datetimes.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/core/arrays/period.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/period.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/construction.py
Lines changed: 13 additions & 27 deletions b/‎pandas/core/construction.py
Lines changed: 13 additions & 27 deletions
diff --git a/‎pandas/core/dtypes/cast.py
Lines changed: 6 additions & 9 deletions b/‎pandas/core/dtypes/cast.py
Lines changed: 6 additions & 9 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/frame.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 6 additions & 24 deletions b/‎pandas/core/indexes/base.py
Lines changed: 6 additions & 24 deletions
diff --git a/‎pandas/core/internals/array_manager.py
Lines changed: 1 addition & 2 deletions b/‎pandas/core/internals/array_manager.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/core/internals/blocks.py
Lines changed: 0 additions & 4 deletions b/‎pandas/core/internals/blocks.py
Lines changed: 0 additions & 4 deletions
@@ -51,7 +51,7 @@ class TimeIntsToPydatetime:
         _tzs,
     )
     param_names = ["box", "size", "tz"]
-    # TODO: fold? freq?
+    # TODO: fold?
 
     def setup(self, box, size, tz):
         if box == "date" and tz is not None:
 
@@ -291,7 +291,8 @@ Other API changes
 - Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
 - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
 - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
-- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
+- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
+- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -531,6 +532,7 @@ Performance improvements
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
+- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`)
 - Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
 
 .. ---------------------------------------------------------------------------
@@ -605,7 +607,7 @@ Missing
 
 MultiIndex
 ^^^^^^^^^^
-- Bug in :meth:`MultiIndex.get_indexer` not matching ``NaN`` values (:issue:`37222`)
+- Bug in :meth:`MultiIndex.get_indexer` not matching ``NaN`` values (:issue:`29252`, :issue:`37222`, :issue:`38623`, :issue:`42883`, :issue:`43222`, :issue:`46173`, :issue:`48905`)
 - Bug in :meth:`MultiIndex.argsort` raising ``TypeError`` when index contains :attr:`NA` (:issue:`48495`)
 - Bug in :meth:`MultiIndex.difference` losing extension array dtype (:issue:`48606`)
 - Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`)
@@ -615,6 +617,7 @@ MultiIndex
 - Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`)
 - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
 - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
+- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`)
 -
 
 I/O
 
@@ -871,6 +871,14 @@ def value_counts(
             result.name = name
             counts = result._values
 
+        elif isinstance(values, ABCMultiIndex):
+            # GH49558
+            levels = list(range(values.nlevels))
+            result = Series(index=values).groupby(level=levels, dropna=dropna).size()
+            # TODO: allow index names to remain (see discussion in GH49497)
+            result.index.names = [None] * values.nlevels
+            counts = result._values
+
         else:
             values = _ensure_arraylike(values)
             keys, counts = value_counts_arraylike(values, dropna)
@@ -1247,7 +1255,7 @@ def compute(self, method: str) -> Series:
             inds = inds[:n]
             findex = nbase
         else:
-            if len(inds) < nbase and len(nan_index) + len(inds) >= nbase:
+            if len(inds) < nbase <= len(nan_index) + len(inds):
                 findex = len(nan_index) + len(inds)
             else:
                 findex = len(inds)
 
@@ -234,21 +234,9 @@ def searchsorted(
         side: Literal["left", "right"] = "left",
         sorter: NumpySorter = None,
     ) -> npt.NDArray[np.intp] | np.intp:
-        # TODO(2.0): use _validate_setitem_value once dt64tz mismatched-timezone
-        #  deprecation is enforced
-        npvalue = self._validate_searchsorted_value(value)
+        npvalue = self._validate_setitem_value(value)
         return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
 
-    def _validate_searchsorted_value(
-        self, value: NumpyValueArrayLike | ExtensionArray
-    ) -> NumpyValueArrayLike:
-        # TODO(2.0): after deprecation in datetimelikearraymixin is enforced,
-        #  we can remove this and use _validate_setitem_value directly
-        if isinstance(value, ExtensionArray):
-            return value.to_numpy()
-        else:
-            return value
-
     @doc(ExtensionArray.shift)
     def shift(self, periods: int = 1, fill_value=None, axis: AxisInt = 0):
 
 
@@ -121,12 +121,12 @@ def _generate_range_overflow_safe(
         return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
 
     elif (endpoint > 0 and side == "start" and stride > 0) or (
-        endpoint < 0 and side == "end" and stride > 0
+        endpoint < 0 < stride and side == "end"
     ):
         # no chance of not-overflowing
         raise OutOfBoundsDatetime(msg)
 
-    elif side == "end" and endpoint > i64max and endpoint - stride <= i64max:
+    elif side == "end" and endpoint - stride <= i64max < endpoint:
         # in _generate_regular_range we added `stride` thereby overflowing
         #  the bounds.  Adjust to fix this.
         return _generate_range_overflow_safe(
 
@@ -1019,7 +1019,10 @@ def reorder_categories(self, new_categories, ordered=None):
         remove_unused_categories : Remove categories which are not used.
         set_categories : Set the categories to the specified ones.
         """
-        if set(self.dtype.categories) != set(new_categories):
+        if (
+            len(self.categories) != len(new_categories)
+            or not self.categories.difference(new_categories).empty
+        ):
             raise ValueError(
                 "items in new_categories are not the same as in old categories"
             )
@@ -1301,8 +1304,6 @@ def _validate_setitem_value(self, value):
         else:
             return self._validate_scalar(value)
 
-    _validate_searchsorted_value = _validate_setitem_value
-
     def _validate_scalar(self, fill_value):
         """
         Convert a user-facing fill_value to a representation to use with our
 
@@ -600,7 +600,6 @@ def _validate_scalar(
         value,
         *,
         allow_listlike: bool = False,
-        setitem: bool = True,
         unbox: bool = True,
     ):
         """
@@ -612,8 +611,6 @@ def _validate_scalar(
         allow_listlike: bool, default False
             When raising an exception, whether the message should say
             listlike inputs are allowed.
-        setitem : bool, default True
-            Whether to check compatibility with setitem strictness.
         unbox : bool, default True
             Whether to unbox the result before returning.  Note: unbox=False
             skips the setitem compatibility check.
@@ -735,14 +732,6 @@ def _validate_listlike(self, value, allow_object: bool = False):
 
         return value
 
-    def _validate_searchsorted_value(self, value):
-        if not is_list_like(value):
-            return self._validate_scalar(value, allow_listlike=True, setitem=False)
-        else:
-            value = self._validate_listlike(value)
-
-        return self._unbox(value)
-
     def _validate_setitem_value(self, value):
         if is_list_like(value):
             value = self._validate_listlike(value)
@@ -1363,10 +1352,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         # Caller is responsible for broadcasting if necessary
         assert self.shape == other.shape, (self.shape, other.shape)
 
-        with warnings.catch_warnings():
-            # filter out warnings about Timestamp.freq
-            warnings.filterwarnings("ignore", category=FutureWarning)
-            res_values = op(self.astype("O"), np.asarray(other))
+        res_values = op(self.astype("O"), np.asarray(other))
 
         result = pd_array(res_values.ravel())
         result = extract_array(result, extract_numpy=True).reshape(self.shape)
 
@@ -751,7 +751,6 @@ def _add_offset(self, offset) -> DatetimeArray:
         else:
             result = DatetimeArray._simple_new(result, dtype=result.dtype)
             if self.tz is not None:
-                # FIXME: tz_localize with non-nano
                 result = result.tz_localize(self.tz)
 
         return result
 
@@ -692,7 +692,7 @@ def searchsorted(
         side: Literal["left", "right"] = "left",
         sorter: NumpySorter = None,
     ) -> npt.NDArray[np.intp] | np.intp:
-        npvalue = self._validate_searchsorted_value(value).view("M8[ns]")
+        npvalue = self._validate_setitem_value(value).view("M8[ns]")
 
         # Cast to M8 to get datetime-like NaT placement
         m8arr = self._ndarray.view("M8[ns]")
 
@@ -597,6 +597,15 @@ def sanitize_array(
                 #  e.g. test_constructor_floating_data_int_dtype
                 # TODO: where is the discussion that documents the reason for this?
                 subarr = np.array(data, copy=copy)
+
+        elif dtype is None:
+            subarr = data
+            if data.dtype == object:
+                subarr = maybe_infer_to_datetimelike(data)
+
+            if subarr is data and copy:
+                subarr = subarr.copy()
+
         else:
             # we will try to copy by-definition here
             subarr = _try_cast(data, dtype, copy)
@@ -666,7 +675,7 @@ def range_to_ndarray(rng: range) -> np.ndarray:
         arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")
     except OverflowError:
         # GH#30173 handling for ranges that overflow int64
-        if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0):
+        if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):
             try:
                 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")
             except OverflowError:
@@ -754,7 +763,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
 
 def _try_cast(
     arr: list | np.ndarray,
-    dtype: np.dtype | None,
+    dtype: np.dtype,
     copy: bool,
 ) -> ArrayLike:
     """
@@ -764,7 +773,7 @@ def _try_cast(
     ----------
     arr : ndarray or list
         Excludes: ExtensionArray, Series, Index.
-    dtype : np.dtype or None
+    dtype : np.dtype
     copy : bool
         If False, don't copy the data if not needed.
 
@@ -774,30 +783,7 @@ def _try_cast(
     """
     is_ndarray = isinstance(arr, np.ndarray)
 
-    if dtype is None:
-        # perf shortcut as this is the most common case
-        if is_ndarray:
-            arr = cast(np.ndarray, arr)
-            if arr.dtype != object:
-                if copy:
-                    return arr.copy()
-                return arr
-
-            out = maybe_infer_to_datetimelike(arr)
-            if out is arr and copy:
-                out = out.copy()
-            return out
-
-        else:
-            # i.e. list
-            varr = np.array(arr, copy=False)
-            # filter out cases that we _dont_ want to go through
-            #  maybe_infer_to_datetimelike
-            if varr.dtype != object or varr.size == 0:
-                return varr
-            return maybe_infer_to_datetimelike(varr)
-
-    elif is_object_dtype(dtype):
+    if is_object_dtype(dtype):
         if not is_ndarray:
             subarr = construct_1d_object_array_from_listlike(arr)
             return subarr
 
@@ -1029,15 +1029,12 @@ def soft_convert_objects(
     if datetime or timedelta:
         # GH 20380, when datetime is beyond year 2262, hence outside
         # bound of nanosecond-resolution 64-bit integers.
-        try:
-            converted = lib.maybe_convert_objects(
-                values,
-                convert_datetime=datetime,
-                convert_timedelta=timedelta,
-                convert_period=period,
-            )
-        except (OutOfBoundsDatetime, ValueError):
-            return values
+        converted = lib.maybe_convert_objects(
+            values,
+            convert_datetime=datetime,
+            convert_timedelta=timedelta,
+            convert_period=period,
+        )
         if converted is not values:
             return converted
 
 
@@ -738,7 +738,7 @@ def __init__(
 
         # For data is list-like, or Iterable (will consume into list)
         elif is_list_like(data):
-            if not isinstance(data, (abc.Sequence, ExtensionArray)):
+            if not isinstance(data, abc.Sequence):
                 if hasattr(data, "__array__"):
                     # GH#44616 big perf improvement for e.g. pytorch tensor
                     data = np.asarray(data)
 
@@ -81,6 +81,7 @@
     find_common_type,
     infer_dtype_from,
     maybe_cast_pointwise_result,
+    maybe_infer_to_datetimelike,
     np_can_hold_element,
 )
 from pandas.core.dtypes.common import (
@@ -503,9 +504,8 @@ def __new__(
                 arr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
 
                 if dtype is None:
-                    arr = _maybe_cast_data_without_dtype(
-                        arr, cast_numeric_deprecated=True
-                    )
+                    arr = maybe_infer_to_datetimelike(arr)
+                    arr = ensure_wrapped_if_datetimelike(arr)
                     dtype = arr.dtype
 
             klass = cls._dtype_to_subclass(arr.dtype)
@@ -534,9 +534,7 @@ def __new__(
             subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj)
             if dtype is None:
                 # with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated
-                subarr = _maybe_cast_data_without_dtype(
-                    subarr, cast_numeric_deprecated=False
-                )
+                subarr = _maybe_cast_data_without_dtype(subarr)
                 dtype = subarr.dtype
             return Index(subarr, dtype=dtype, copy=copy, name=name)
 
@@ -3500,13 +3498,7 @@ def _assert_can_do_setop(self, other) -> bool:
 
     def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
         if not isinstance(other, Index):
-            # TODO(2.0): no need to special-case here once _with_infer
-            #  deprecation is enforced
-            if hasattr(other, "dtype"):
-                other = Index(other, name=self.name, dtype=other.dtype)
-            else:
-                # e.g. list
-                other = Index(other, name=self.name)
+            other = Index(other, name=self.name)
             result_name = self.name
         else:
             result_name = get_op_result_name(self, other)
@@ -7062,18 +7054,14 @@ def maybe_extract_name(name, obj, cls) -> Hashable:
     return name
 
 
-def _maybe_cast_data_without_dtype(
-    subarr: np.ndarray, cast_numeric_deprecated: bool = True
-) -> ArrayLike:
+def _maybe_cast_data_without_dtype(subarr: npt.NDArray[np.object_]) -> ArrayLike:
     """
     If we have an arraylike input but no passed dtype, try to infer
     a supported dtype.
 
     Parameters
     ----------
     subarr : np.ndarray[object]
-    cast_numeric_deprecated : bool, default True
-        Whether to issue a FutureWarning when inferring numeric dtypes.
 
     Returns
     -------
@@ -7088,12 +7076,6 @@ def _maybe_cast_data_without_dtype(
         convert_interval=True,
         dtype_if_all_nat=np.dtype("datetime64[ns]"),
     )
-    if result.dtype.kind in ["i", "u", "f"]:
-        if not cast_numeric_deprecated:
-            # i.e. we started with a list, not an ndarray[object]
-            return result
-        return subarr
-
     result = ensure_wrapped_if_datetimelike(result)
     return result
 
 
@@ -697,8 +697,7 @@ def _equal_values(self, other) -> bool:
         for left, right in zip(self.arrays, other.arrays):
             if not array_equals(left, right):
                 return False
-        else:
-            return True
+        return True
 
     # TODO
     # to_dict
 
@@ -1920,15 +1920,11 @@ def _catch_deprecated_value_error(err: Exception) -> None:
     which will no longer be raised in version.2.0.
     """
     if isinstance(err, ValueError):
-        # TODO(2.0): once DTA._validate_setitem_value deprecation
-        #  is enforced, stop catching ValueError here altogether
         if isinstance(err, IncompatibleFrequency):
             pass
         elif "'value.closed' is" in str(err):
             # IntervalDtype mismatched 'closed'
             pass
-        elif "Timezones don't match" not in str(err):
-            raise err
 
 
 class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ class TimeIntsToPydatetime:`
`51`	`51`	`_tzs,`
`52`	`52`	`)`
`53`	`53`	`param_names = ["box", "size", "tz"]`
`54`		`- # TODO: fold? freq?`
	`54`	`+ # TODO: fold?`
`55`	`55`
`56`	`56`	`def setup(self, box, size, tz):`
`57`	`57`	`if box == "date" and tz is not None:`