pandas-dev
diff --git a/‎asv_bench/benchmarks/sparse.py
Lines changed: 17 additions & 5 deletions b/‎asv_bench/benchmarks/sparse.py
Lines changed: 17 additions & 5 deletions
diff --git a/‎doc/source/whatsnew/v1.3.3.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v1.3.3.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 9 additions & 1 deletion b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 4 additions & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 31 additions & 5 deletions b/‎pandas/_libs/groupby.pyx
Lines changed: 31 additions & 5 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 8 additions & 9 deletions b/‎pandas/_libs/index.pyx
Lines changed: 8 additions & 9 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/lib.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/masked.py
Lines changed: 2 additions & 0 deletions b/‎pandas/core/arrays/masked.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/arrays/sparse/accessor.py
Lines changed: 2 additions & 0 deletions b/‎pandas/core/arrays/sparse/accessor.py
Lines changed: 2 additions & 0 deletions
@@ -67,16 +67,28 @@ def time_sparse_series_from_coo(self):
 
 
 class ToCoo:
-    def setup(self):
+    params = [True, False]
+    param_names = ["sort_labels"]
+
+    def setup(self, sort_labels):
         s = Series([np.nan] * 10000)
         s[0] = 3.0
         s[100] = -1.0
         s[999] = 12.1
-        s.index = MultiIndex.from_product([range(10)] * 4)
-        self.ss = s.astype("Sparse")
 
-    def time_sparse_series_to_coo(self):
-        self.ss.sparse.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)
+        s_mult_lvl = s.set_axis(MultiIndex.from_product([range(10)] * 4))
+        self.ss_mult_lvl = s_mult_lvl.astype("Sparse")
+
+        s_two_lvl = s.set_axis(MultiIndex.from_product([range(100)] * 2))
+        self.ss_two_lvl = s_two_lvl.astype("Sparse")
+
+    def time_sparse_series_to_coo(self, sort_labels):
+        self.ss_mult_lvl.sparse.to_coo(
+            row_levels=[0, 1], column_levels=[2, 3], sort_labels=sort_labels
+        )
+
+    def time_sparse_series_to_coo_single_level(self, sort_labels):
+        self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
 
 
 class Arithmetic:
 
@@ -17,9 +17,11 @@ Fixed regressions
 - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
 - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
 - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
+- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`)
 - Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
 - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
 - Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)
+- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -76,13 +76,15 @@ Styler
   - :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`).
   - :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`).
   - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption`` (:issue:`41946`, :issue:`43149`).
-  - Keyword argument ``level`` is added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
+  - Keyword arguments ``level`` and ``names`` added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for additional control of visibility of MultiIndexes and index names (:issue:`25475`, :issue:`43404`, :issue:`43346`)
   - Global options have been extended to configure default ``Styler`` properties including formatting and encoding and mathjax options and LaTeX (:issue:`41395`)
 
 Formerly Styler relied on ``display.html.use_mathjax``, which has now been replaced by ``styler.html.mathjax``.
 
 There are also bug fixes and deprecations listed below.
 
+Validation now for ``caption`` arg (:issue:`43368`)
+
 .. _whatsnew_140.enhancements.pyarrow_csv_engine:
 
 Multithreaded CSV reading with a new CSV Engine based on pyarrow
@@ -272,6 +274,7 @@ Other Deprecations
 - Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`)
 - Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
 - Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`)
+- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
 
 .. ---------------------------------------------------------------------------
 
@@ -287,6 +290,8 @@ Performance improvements
 - Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`)
 - Performance improvement in :func:`read_stata` (:issue:`43059`)
 - Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`)
+- Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
+-
 
 .. ---------------------------------------------------------------------------
 
@@ -377,6 +382,7 @@ I/O
 - Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
+- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 -
 
 Period
@@ -393,6 +399,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
 - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
+- Bug in :meth:`GroupBy.max` and :meth:`GroupBy.min` with nullable integer dtypes losing precision (:issue:`41743`)
 - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
 - Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
 - Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input Series was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`)
@@ -406,6 +413,7 @@ Reshaping
 - Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`)
 - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
 - Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
+- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
 -
 
 Sparse
 
@@ -123,13 +123,17 @@ def group_max(
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     min_count: int = ...,
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
 ) -> None: ...
 def group_min(
     out: np.ndarray,  # groupby_t[:, ::1]
     counts: np.ndarray,  # int64_t[::1]
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
     labels: np.ndarray,  # const int64_t[:]
     min_count: int = ...,
+    mask: np.ndarray | None = ...,
+    result_mask: np.ndarray | None = ...,
 ) -> None: ...
 def group_cummin(
     out: np.ndarray,  # groupby_t[:, ::1]
 
@@ -1182,7 +1182,9 @@ cdef group_min_max(groupby_t[:, ::1] out,
                    const intp_t[::1] labels,
                    Py_ssize_t min_count=-1,
                    bint is_datetimelike=False,
-                   bint compute_max=True):
+                   bint compute_max=True,
+                   const uint8_t[:, ::1] mask=None,
+                   uint8_t[:, ::1] result_mask=None):
     """
     Compute minimum/maximum  of columns of `values`, in row groups `labels`.
 
@@ -1203,6 +1205,12 @@ cdef group_min_max(groupby_t[:, ::1] out,
         True if `values` contains datetime-like entries.
     compute_max : bint, default True
         True to compute group-wise max, False to compute min
+    mask : ndarray[bool, ndim=2], optional
+        If not None, indices represent missing values,
+        otherwise the mask will not be used
+    result_mask : ndarray[bool, ndim=2], optional
+        If not None, these specify locations in the output that are NA.
+        Modified in-place.
 
     Notes
     -----
@@ -1215,6 +1223,8 @@ cdef group_min_max(groupby_t[:, ::1] out,
         ndarray[groupby_t, ndim=2] group_min_or_max
         bint runtime_error = False
         int64_t[:, ::1] nobs
+        bint uses_mask = mask is not None
+        bint isna_entry
 
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
@@ -1249,7 +1259,12 @@ cdef group_min_max(groupby_t[:, ::1] out,
             for j in range(K):
                 val = values[i, j]
 
-                if not _treat_as_na(val, is_datetimelike):
+                if uses_mask:
+                    isna_entry = mask[i, j]
+                else:
+                    isna_entry = _treat_as_na(val, is_datetimelike)
+
+                if not isna_entry:
                     nobs[lab, j] += 1
                     if compute_max:
                         if val > group_min_or_max[lab, j]:
@@ -1265,7 +1280,10 @@ cdef group_min_max(groupby_t[:, ::1] out,
                         runtime_error = True
                         break
                     else:
-                        out[i, j] = nan_val
+                        if uses_mask:
+                            result_mask[i, j] = True
+                        else:
+                            out[i, j] = nan_val
                 else:
                     out[i, j] = group_min_or_max[i, j]
 
@@ -1282,7 +1300,9 @@ def group_max(groupby_t[:, ::1] out,
               ndarray[groupby_t, ndim=2] values,
               const intp_t[::1] labels,
               Py_ssize_t min_count=-1,
-              bint is_datetimelike=False) -> None:
+              bint is_datetimelike=False,
+              const uint8_t[:, ::1] mask=None,
+              uint8_t[:, ::1] result_mask=None) -> None:
     """See group_min_max.__doc__"""
     group_min_max(
         out,
@@ -1292,6 +1312,8 @@ def group_max(groupby_t[:, ::1] out,
         min_count=min_count,
         is_datetimelike=is_datetimelike,
         compute_max=True,
+        mask=mask,
+        result_mask=result_mask,
     )
 
 
@@ -1302,7 +1324,9 @@ def group_min(groupby_t[:, ::1] out,
               ndarray[groupby_t, ndim=2] values,
               const intp_t[::1] labels,
               Py_ssize_t min_count=-1,
-              bint is_datetimelike=False) -> None:
+              bint is_datetimelike=False,
+              const uint8_t[:, ::1] mask=None,
+              uint8_t[:, ::1] result_mask=None) -> None:
     """See group_min_max.__doc__"""
     group_min_max(
         out,
@@ -1312,6 +1336,8 @@ def group_min(groupby_t[:, ::1] out,
         min_count=min_count,
         is_datetimelike=is_datetimelike,
         compute_max=False,
+        mask=mask,
+        result_mask=result_mask,
     )
 
 
 
@@ -603,35 +603,34 @@ cdef class BaseMultiIndexCodesEngine:
     def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
         raise NotImplementedError("Implemented by subclass")
 
-    def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
+    def _extract_level_codes(self, target) -> np.ndarray:
         """
         Map the requested list of (tuple) keys to their integer representations
         for searching in the underlying integer index.
 
         Parameters
         ----------
-        target : ndarray[object]
-            Each key is a tuple, with a label for each level of the index.
+        target : MultiIndex
 
         Returns
         ------
         int_keys : 1-dimensional array of dtype uint64 or object
             Integers representing one combination each
         """
+        zt = [target._get_level_values(i) for i in range(target.nlevels)]
         level_codes = [lev.get_indexer(codes) + 1 for lev, codes
-                       in zip(self.levels, zip(*target))]
+                       in zip(self.levels, zt)]
         return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
 
-    def get_indexer(self, ndarray[object] target) -> np.ndarray:
+    def get_indexer(self, target) -> np.ndarray:
         """
         Returns an array giving the positions of each value of `target` in
         `self.values`, where -1 represents a value in `target` which does not
         appear in `self.values`
 
         Parameters
         ----------
-        target : ndarray[object]
-            Each key is a tuple, with a label for each level of the index
+        target : MultiIndex
 
         Returns
         -------
@@ -742,8 +741,8 @@ cdef class BaseMultiIndexCodesEngine:
 
         return self._base.get_loc(self, lab_int)
 
-    def get_indexer_non_unique(self, ndarray[object] target):
-
+    def get_indexer_non_unique(self, target):
+        # target: MultiIndex
         lab_ints = self._extract_level_codes(target)
         indexer = self._base.get_indexer_non_unique(self, lab_ints)
 
 
@@ -1092,7 +1092,7 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
 cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
     return (
         # equiv: `isinstance(obj, abc.Iterable)`
-        hasattr(obj, "__iter__") and not isinstance(obj, type)
+        getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
         # we do not count strings/unicode/bytes as list-like
         and not isinstance(obj, (str, bytes))
         # exclude zero-dimensional numpy arrays, effectively scalars
 
@@ -123,6 +123,8 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
             raise ValueError("values must be a 1D array")
         if mask.ndim != 1:
             raise ValueError("mask must be a 1D array")
+        if values.shape != mask.shape:
+            raise ValueError("values and mask must have same shape")
 
         if copy:
             values = values.copy()
 
@@ -113,6 +113,8 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
         column_levels : tuple/list
         sort_labels : bool, default False
             Sort the row and column labels before forming the sparse matrix.
+            When `row_levels` and/or `column_levels` refer to a single level,
+            set to `True` for a faster execution.
 
         Returns
         -------