sthagen · sthagen · Apr 17, 2021 · Apr 16, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -230,6 +230,38 @@ Notable bug fixes
 
 These are bug fixes that might have notable behavior changes.
 
+``Categorical.unique`` now always maintains same dtype as original
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously, when calling :meth:`~Categorical.unique` with categorical data, unused categories in the new array
+would be removed, meaning that the dtype of the new array would be different than the
+original, if some categories are not present in the unique array (:issue:`18291`)
+
+As an example of this, given:
+
+.. ipython:: python
+
+        dtype = pd.CategoricalDtype(['bad', 'neutral', 'good'], ordered=True)
+        cat = pd.Categorical(['good', 'good', 'bad', 'bad'], dtype=dtype)
+        original = pd.Series(cat)
+        unique = original.unique()
+
+*pandas < 1.3.0*:
+
+.. code-block:: ipython
+
+    In [1]: unique
+    ['good', 'bad']
+    Categories (2, object): ['bad' < 'good']
+    In [2]: original.dtype == unique.dtype
+    False
+
+*pandas >= 1.3.0*
+
+.. ipython:: python
+
+        unique
+        original.dtype == unique.dtype
 
 Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -790,6 +822,9 @@ Groupby/resample/rolling
 - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would incorrectly raise a ``ValueError`` when providing ``times`` (:issue:`40164`)
 - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes  (:issue:`40164`)
 - :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`)
+- Bug in :meth:`core.window.ewm.ExponentialMovingWindowGroupby.mean` where the times argument was ignored when ``engine='numba'`` (:issue:`40951`)
+- Bug in :meth:`core.window.ewm.ExponentialMovingWindowGroupby.mean` where the wrong times were used in case of multiple groups (:issue:`40951`)
+- Bug in :class:`core.window.ewm.ExponentialMovingWindowGroupby` where the times vector and values became out of sync for non-trivial groups (:issue:`40951`)
 - Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`)
 - Bug in aggregation functions for :class:`DataFrame` not respecting ``numeric_only`` argument when ``level`` keyword was given (:issue:`40660`)
 - Bug in :meth:`SeriesGroupBy.aggregate` where using a user-defined function to aggregate a ``Series`` with an object-typed :class:`Index` causes an incorrect :class:`Index` shape (issue:`40014`)
@@ -830,7 +865,7 @@ ExtensionArray
 - Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with :class:`ExtensionArray` dtype (:issue:`38729`)
 - Fixed bug where :meth:`Series.idxmax`, :meth:`Series.idxmin` and ``argmax/min`` fail when the underlying data is :class:`ExtensionArray` (:issue:`32749`, :issue:`33719`, :issue:`36566`)
 - Fixed a bug where some properties of subclasses of :class:`PandasExtensionDtype` where improperly cached (:issue:`40329`)
--
+- Bug in :meth:`DataFrame.mask` where masking a :class:`Dataframe` with an :class:`ExtensionArray` dtype raises ``ValueError`` (:issue:`40941`)
 
 Styler
 ^^^^^^

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -791,7 +791,8 @@ cdef class StringHashTable(HashTable):
             raise KeyError(key)
 
     @cython.boundscheck(False)
-    def get_indexer(self, ndarray[object] values):
+    def get_indexer(self, ndarray[object] values) -> ndarray:
+        # -> np.ndarray[np.intp]
         cdef:
             Py_ssize_t i, n = len(values)
             ndarray[intp_t] labels = np.empty(n, dtype=np.intp)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -4,6 +4,7 @@
 from typing import (
     Any,
     Callable,
+    Generator,
 )
 
 import numpy as np
@@ -52,8 +53,7 @@ def is_bool_array(values: np.ndarray, skipna: bool = False): ...
 
 def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
 
-# TODO: gen: Generator?
-def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ...
+def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
 def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
 def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
 
@@ -90,10 +90,9 @@ def infer_datetimelike_array(
     arr: np.ndarray  # np.ndarray[object]
 ) -> str: ...
 
-# TODO: new_dtype -> np.dtype?
 def astype_intsafe(
     arr: np.ndarray,  # np.ndarray[object]
-    new_dtype,
+    new_dtype: np.dtype,
 ) -> np.ndarray: ...
 
 def fast_zip(ndarrays: list) -> np.ndarray: ...  # np.ndarray[object]
@@ -134,15 +133,13 @@ def memory_usage_of_objects(
 ) -> int: ...  # np.int64
 
 
-# TODO: f: Callable?
-# TODO: dtype -> DtypeObj?
 def map_infer_mask(
     arr: np.ndarray,
     f: Callable[[Any], Any],
     mask: np.ndarray,  # const uint8_t[:]
     convert: bool = ...,
     na_value: Any = ...,
-    dtype: Any = ...,
+    dtype: np.dtype = ...,
 ) -> ArrayLike: ...
 
 def indices_fast(

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -633,7 +633,7 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def astype_intsafe(ndarray[object] arr, new_dtype) -> ndarray:
+def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray:
     cdef:
         Py_ssize_t i, n = len(arr)
         object val
@@ -661,7 +661,8 @@ cpdef ndarray[object] ensure_string_array(
         bint copy=True,
         bint skipna=True,
 ):
-    """Returns a new numpy array with object dtype and only strings and na values.
+    """
+    Returns a new numpy array with object dtype and only strings and na values.
 
     Parameters
     ----------
@@ -679,7 +680,7 @@ cpdef ndarray[object] ensure_string_array(
 
     Returns
     -------
-    ndarray
+    np.ndarray[object]
         An array with the input array's elements casted to str or nan-like.
     """
     cdef:
@@ -2452,7 +2453,8 @@ no_default = NoDefault.no_default  # Sentinel indicating the default value.
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
-                   object na_value=no_default, object dtype=object) -> "ArrayLike":
+                   object na_value=no_default, cnp.dtype dtype=np.dtype(object)
+                   ) -> "ArrayLike":
     """
     Substitute for np.vectorize with pandas-friendly dtype inference.
 
@@ -2472,7 +2474,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
 
     Returns
     -------
-    ndarray
+    np.ndarray or ExtensionArray
     """
     cdef:
         Py_ssize_t i, n

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -93,7 +93,7 @@ def build_field_sarray(const int64_t[:] dtindex):
     return out
 
 
-def month_position_check(fields, weekdays):
+def month_position_check(fields, weekdays) -> str | None:
     cdef:
         int32_t daysinmonth, y, m, d
         bint calendar_end = True
@@ -755,7 +755,7 @@ cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
     return _floor_int64(values + unit // 2, unit)
 
 
-def round_nsint64(values: np.ndarray, mode: RoundTo, nanos) -> np.ndarray:
+def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
     """
     Applies rounding mode at given frequency
 

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -1485,8 +1485,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
     com : float64
     adjust : bool
     ignore_na : bool
-    times : ndarray (float64 type)
-    halflife : float64
+    deltas : ndarray (float64 type)
 
     Returns
     -------
@@ -1495,7 +1494,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
 
     cdef:
         Py_ssize_t i, j, s, e, nobs, win_size, N = len(vals), M = len(start)
-        const float64_t[:] sub_vals
+        const float64_t[:] sub_deltas, sub_vals
         ndarray[float64_t] sub_output, output = np.empty(N, dtype=float)
         float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur
         bint is_observation
@@ -1511,6 +1510,9 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
         s = start[j]
         e = end[j]
         sub_vals = vals[s:e]
+        # note that len(deltas) = len(vals) - 1 and deltas[i] is to be used in
+        # conjunction with vals[i+1]
+        sub_deltas = deltas[s:e - 1]
         win_size = len(sub_vals)
         sub_output = np.empty(win_size, dtype=float)
 
@@ -1528,7 +1530,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
                 if weighted_avg == weighted_avg:
 
                     if is_observation or not ignore_na:
-                        old_wt *= old_wt_factor ** deltas[i - 1]
+                        old_wt *= old_wt_factor ** sub_deltas[i - 1]
                         if is_observation:
 
                             # avoid numerical errors on constant series

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -609,7 +609,7 @@ def argsort(
 
         Returns
         -------
-        ndarray
+        np.ndarray[np.intp]
             Array of indices that sort ``self``. If NaN values are contained,
             NaN values are placed at the end.
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1599,7 +1599,7 @@ def argsort(self, ascending=True, kind="quicksort", **kwargs):
 
         Returns
         -------
-        numpy.array
+        np.ndarray[np.intp]
 
         See Also
         --------
@@ -2127,16 +2127,15 @@ def mode(self, dropna=True):
     def unique(self):
         """
         Return the ``Categorical`` which ``categories`` and ``codes`` are
-        unique. Unused categories are NOT returned.
+        unique.
+
+        .. versionchanged:: 1.3.0
 
-        - unordered category: values and categories are sorted by appearance
-          order.
-        - ordered category: values are sorted by appearance order, categories
-          keeps existing order.
+            Previously, unused categories were dropped from the new categories.
 
         Returns
         -------
-        unique values : ``Categorical``
+        Categorical
 
         See Also
         --------
@@ -2146,37 +2145,15 @@ def unique(self):
 
         Examples
         --------
-        An unordered Categorical will return categories in the
-        order of appearance.
-
         >>> pd.Categorical(list("baabc")).unique()
         ['b', 'a', 'c']
-        Categories (3, object): ['b', 'a', 'c']
-
-        >>> pd.Categorical(list("baabc"), categories=list("abc")).unique()
-        ['b', 'a', 'c']
-        Categories (3, object): ['b', 'a', 'c']
-
-        An ordered Categorical preserves the category ordering.
-
-        >>> pd.Categorical(
-        ...     list("baabc"), categories=list("abc"), ordered=True
-        ... ).unique()
-        ['b', 'a', 'c']
+        Categories (3, object): ['a', 'b', 'c']
+        >>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique()
+        ['b', 'a']
         Categories (3, object): ['a' < 'b' < 'c']
         """
-        # unlike np.unique, unique1d does not sort
         unique_codes = unique1d(self.codes)
-        cat = self.copy()
-
-        # keep nan in codes
-        cat._ndarray = unique_codes
-
-        # exclude nan from indexer for categories
-        take_codes = unique_codes[unique_codes != -1]
-        if self.ordered:
-            take_codes = np.sort(take_codes)
-        return cat.set_categories(cat.categories.take(take_codes))
+        return self._from_backing_data(unique_codes)
 
     def _values_for_factorize(self):
         return self._ndarray, -1

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -855,8 +855,9 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArr
         This method takes a time zone (tz) naive Datetime Array/Index object
         and makes this time zone aware. It does not move the time to another
         time zone.
-        Time zone localization helps to switch from time zone aware to time
-        zone unaware objects.
+
+        This method can also be used to do the inverse -- to create a time
+        zone unaware object from an aware object. To that end, pass `tz=None`.
 
         Parameters
         ----------

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -135,10 +135,10 @@ class TimedeltaArray(dtl.TimelikeOps):
     # define my properties & methods for delegation
     _other_ops: list[str] = []
     _bool_ops: list[str] = []
-    _object_ops = ["freq"]
-    _field_ops = ["days", "seconds", "microseconds", "nanoseconds"]
-    _datetimelike_ops = _field_ops + _object_ops + _bool_ops
-    _datetimelike_methods = [
+    _object_ops: list[str] = ["freq"]
+    _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"]
+    _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
+    _datetimelike_methods: list[str] = [
         "to_pytimedelta",
         "total_seconds",
         "round",

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -4,7 +4,6 @@
 
 from __future__ import annotations
 
-from contextlib import suppress
 from datetime import (
     date,
     datetime,
@@ -29,7 +28,6 @@
     NaT,
     OutOfBoundsDatetime,
     OutOfBoundsTimedelta,
-    Period,
     Timedelta,
     Timestamp,
     conversion,
@@ -87,7 +85,6 @@
     PeriodDtype,
 )
 from pandas.core.dtypes.generic import (
-    ABCDataFrame,
     ABCExtensionArray,
     ABCSeries,
 )
@@ -249,9 +246,6 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
     try to cast to the specified dtype (e.g. convert back to bool/int
     or could be an astype of float64->float32
     """
-    if isinstance(result, ABCDataFrame):
-        # see test_pivot_table_doctest_case
-        return result
     do_round = False
 
     if isinstance(dtype, str):
@@ -278,15 +272,9 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
 
         dtype = np.dtype(dtype)
 
-    elif dtype.type is Period:
-        from pandas.core.arrays import PeriodArray
-
-        with suppress(TypeError):
-            # e.g. TypeError: int() argument must be a string, a
-            #  bytes-like object or a number, not 'Period
-
-            # error: "dtype[Any]" has no attribute "freq"
-            return PeriodArray(result, freq=dtype.freq)  # type: ignore[attr-defined]
+    if not isinstance(dtype, np.dtype):
+        # enforce our signature annotation
+        raise TypeError(dtype)  # pragma: no cover
 
     converted = maybe_downcast_numeric(result, dtype, do_round)
     if converted is not result:
@@ -295,15 +283,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
     # a datetimelike
     # GH12821, iNaT is cast to float
     if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]:
-        if isinstance(dtype, DatetimeTZDtype):
-            # convert to datetime and change timezone
-            i8values = result.astype("i8", copy=False)
-            cls = dtype.construct_array_type()
-            # equiv: DatetimeArray(i8values).tz_localize("UTC").tz_convert(dtype.tz)
-            dt64values = i8values.view("M8[ns]")
-            result = cls._simple_new(dt64values, dtype=dtype)
-        else:
-            result = result.astype(dtype)
+        result = result.astype(dtype)
 
     return result
-Original file line number
+Diff line change
@@ Expand Up / @@ -609,7 +609,7 @@ def argsort( @@
             Returns
             -------
-            ndarray
+            np.ndarray[np.intp]
                 Array of indices that sort ``self``. If NaN values are contained,
                 NaN values are placed at the end.
@@ Expand Down @@