sthagen · sthagen · Jul 28, 2021 · Jul 27, 2021 · Jul 28, 2021 · Jul 28, 2021
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -121,8 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
       pandas/io/parsers/ \
       pandas/io/sas/ \
       pandas/io/sql.py \
-      pandas/tseries/ \
-      pandas/io/formats/style_render.py
+      pandas/tseries/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi

diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst
@@ -316,6 +316,34 @@ The ``by`` keyword can be specified to plot grouped histograms:
    @savefig grouped_hist.png
    data.hist(by=np.random.randint(0, 4, 1000), figsize=(6, 4));
 
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
+   np.random.seed(123456)
+
+In addition, the ``by`` keyword can also be specified in :meth:`DataFrame.plot.hist`.
+
+.. versionchanged:: 1.4.0
+
+.. ipython:: python
+
+   data = pd.DataFrame(
+       {
+           "a": np.random.choice(["x", "y", "z"], 1000),
+           "b": np.random.choice(["e", "f", "g"], 1000),
+           "c": np.random.randn(1000),
+           "d": np.random.randn(1000) - 1,
+       },
+   )
+
+   @savefig grouped_hist_by.png
+   data.plot.hist(by=["a", "b"], figsize=(10, 5));
+
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
 
 .. _visualization.box:
 
@@ -448,6 +476,32 @@ columns:
 
     plt.close("all")
 
+You could also create groupings with :meth:`DataFrame.plot.box`, for instance:
+
+.. versionchanged:: 1.4.0
+
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
+   np.random.seed(123456)
+
+.. ipython:: python
+   :okwarning:
+
+   df = pd.DataFrame(np.random.rand(10, 3), columns=["Col1", "Col2", "Col3"])
+   df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
+
+   plt.figure();
+
+   @savefig box_plot_ex4.png
+   bp = df.plot.box(column=["Col1", "Col2"], by="X")
+
+.. ipython:: python
+   :suppress:
+
+    plt.close("all")
+
 .. _visualization.box.return:
 
 In ``boxplot``, the return type can be controlled by the ``return_type``, keyword. The valid choices are ``{"axes", "dict", "both", None}``.

diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -17,6 +17,7 @@ Fixed regressions
 - Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
 - Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
 - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
+- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -35,6 +35,7 @@ Other enhancements
 -  Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
 - :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
+- Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -203,7 +204,7 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.rank` raising ``ValueError`` with ``object`` columns and ``method="first"`` (:issue:`41931`)
 - Bug in :meth:`DataFrame.rank` treating missing values and extreme values as equal (for example ``np.nan`` and ``np.inf``), causing incorrect results when ``na_option="bottom"`` or ``na_option="top`` used (:issue:`41931`)
--
+- Bug in ``numexpr`` engine still being used when the option ``compute.use_numexpr`` is set to ``False`` (:issue:`32556`)
 
 Conversion
 ^^^^^^^^^^
@@ -260,11 +261,12 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
 - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
--
+- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
 
 Reshaping
 ^^^^^^^^^
 - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
+- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
 -
 
 Sparse
@@ -284,6 +286,7 @@ Styler
 
 Other
 ^^^^^
+- Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`)
 
 .. ***DO NOT USE THIS SECTION***
 

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -217,8 +217,8 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
     This is a reverse of the label factorization process.
     """
     cdef:
-        Py_ssize_t i, loc, label, n
-        ndarray[intp_t] indexer, where, counts
+        Py_ssize_t i, label, n
+        intp_t[::1] indexer, where, counts
 
     counts = np.zeros(ngroups + 1, dtype=np.intp)
     n = len(index)
@@ -241,7 +241,7 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
             indexer[where[label]] = i
             where[label] += 1
 
-    return indexer, counts
+    return indexer.base, counts.base
 
 
 cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
@@ -325,11 +325,10 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
     cdef:
         Py_ssize_t i, j, xi, yi, N, K
         bint minpv
-        ndarray[float64_t, ndim=2] result
+        float64_t[:, ::1] result
         ndarray[uint8_t, ndim=2] mask
         int64_t nobs = 0
-        float64_t vx, vy, meanx, meany, divisor, prev_meany, prev_meanx, ssqdmx
-        float64_t ssqdmy, covxy
+        float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy
 
     N, K = (<object>mat).shape
 
@@ -352,13 +351,13 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
                         vx = mat[i, xi]
                         vy = mat[i, yi]
                         nobs += 1
-                        prev_meanx = meanx
-                        prev_meany = meany
-                        meanx = meanx + 1 / nobs * (vx - meanx)
-                        meany = meany + 1 / nobs * (vy - meany)
-                        ssqdmx = ssqdmx + (vx - meanx) * (vx - prev_meanx)
-                        ssqdmy = ssqdmy + (vy - meany) * (vy - prev_meany)
-                        covxy = covxy + (vx - meanx) * (vy - prev_meany)
+                        dx = vx - meanx
+                        dy = vy - meany
+                        meanx += 1 / nobs * dx
+                        meany += 1 / nobs * dy
+                        ssqdmx += (vx - meanx) * dx
+                        ssqdmy += (vy - meany) * dy
+                        covxy += (vx - meanx) * dy
 
                 if nobs < minpv:
                     result[xi, yi] = result[yi, xi] = NaN
@@ -370,7 +369,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
                     else:
                         result[xi, yi] = result[yi, xi] = NaN
 
-    return result
+    return result.base
 
 # ----------------------------------------------------------------------
 # Pairwise Spearman correlation

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -3370,7 +3370,10 @@ cdef class _CustomBusinessMonth(BusinessMixin):
         """
         Define default roll function to be called in apply method.
         """
-        cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds)
+        cbday_kwds = self.kwds.copy()
+        cbday_kwds['offset'] = timedelta(0)
+
+        cbday = CustomBusinessDay(n=1, normalize=False, **cbday_kwds)
 
         if self._prefix.endswith("S"):
             # MonthBegin
@@ -3414,6 +3417,9 @@ cdef class _CustomBusinessMonth(BusinessMixin):
 
         new = cur_month_offset_date + n * self.m_offset
         result = self.cbday_roll(new)
+
+        if self.offset:
+            result = result + self.offset
         return result
 
 

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -310,7 +310,10 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x,
     t = y - mean_x[0]
     compensation[0] = t + mean_x[0] - y
     delta = t
-    mean_x[0] = mean_x[0] + delta / nobs[0]
+    if nobs[0]:
+        mean_x[0] = mean_x[0] + delta / nobs[0]
+    else:
+        mean_x[0] = 0
     ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])
 
 

diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
@@ -43,9 +43,10 @@ def _check_engine(engine: str | None) -> str:
         Engine name.
     """
     from pandas.core.computation.check import NUMEXPR_INSTALLED
+    from pandas.core.computation.expressions import USE_NUMEXPR
 
     if engine is None:
-        engine = "numexpr" if NUMEXPR_INSTALLED else "python"
+        engine = "numexpr" if USE_NUMEXPR else "python"
 
     if engine not in ENGINES:
         valid_engines = list(ENGINES.keys())

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -2897,16 +2897,15 @@ def _get_cythonized_result(
 
         ids, _, ngroups = grouper.group_info
         output: dict[base.OutputKey, np.ndarray] = {}
-        base_func = getattr(libgroupby, how)
-
-        error_msg = ""
-        for idx, obj in enumerate(self._iterate_slices()):
-            name = obj.name
-            values = obj._values
 
-            if numeric_only and not is_numeric_dtype(values.dtype):
-                continue
+        base_func = getattr(libgroupby, how)
+        base_func = partial(base_func, labels=ids)
+        if needs_ngroups:
+            base_func = partial(base_func, ngroups=ngroups)
+        if min_count is not None:
+            base_func = partial(base_func, min_count=min_count)
 
+        def blk_func(values: ArrayLike) -> ArrayLike:
             if aggregate:
                 result_sz = ngroups
             else:
@@ -2915,54 +2914,31 @@ def _get_cythonized_result(
             result = np.zeros(result_sz, dtype=cython_dtype)
             if needs_2d:
                 result = result.reshape((-1, 1))
-            func = partial(base_func, result)
+            func = partial(base_func, out=result)
 
             inferences = None
 
             if needs_counts:
                 counts = np.zeros(self.ngroups, dtype=np.int64)
-                func = partial(func, counts)
+                func = partial(func, counts=counts)
 
             if needs_values:
                 vals = values
                 if pre_processing:
-                    try:
-                        vals, inferences = pre_processing(vals)
-                    except TypeError as err:
-                        error_msg = str(err)
-                        howstr = how.replace("group_", "")
-                        warnings.warn(
-                            "Dropping invalid columns in "
-                            f"{type(self).__name__}.{howstr} is deprecated. "
-                            "In a future version, a TypeError will be raised. "
-                            f"Before calling .{howstr}, select only columns which "
-                            "should be valid for the function.",
-                            FutureWarning,
-                            stacklevel=3,
-                        )
-                        continue
+                    vals, inferences = pre_processing(vals)
+
                 vals = vals.astype(cython_dtype, copy=False)
                 if needs_2d:
                     vals = vals.reshape((-1, 1))
-                func = partial(func, vals)
-
-            func = partial(func, ids)
-
-            if min_count is not None:
-                func = partial(func, min_count)
+                func = partial(func, values=vals)
 
             if needs_mask:
                 mask = isna(values).view(np.uint8)
-                func = partial(func, mask)
-
-            if needs_ngroups:
-                func = partial(func, ngroups)
+                func = partial(func, mask=mask)
 
             if needs_nullable:
                 is_nullable = isinstance(values, BaseMaskedArray)
                 func = partial(func, nullable=is_nullable)
-                if post_processing:
-                    post_processing = partial(post_processing, nullable=is_nullable)
 
             func(**kwargs)  # Call func to modify indexer values in place
 
@@ -2973,9 +2949,38 @@ def _get_cythonized_result(
                 result = algorithms.take_nd(values, result)
 
             if post_processing:
-                result = post_processing(result, inferences)
+                pp_kwargs = {}
+                if needs_nullable:
+                    pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
 
-            key = base.OutputKey(label=name, position=idx)
+                result = post_processing(result, inferences, **pp_kwargs)
+
+            return result
+
+        error_msg = ""
+        for idx, obj in enumerate(self._iterate_slices()):
+            values = obj._values
+
+            if numeric_only and not is_numeric_dtype(values.dtype):
+                continue
+
+            try:
+                result = blk_func(values)
+            except TypeError as err:
+                error_msg = str(err)
+                howstr = how.replace("group_", "")
+                warnings.warn(
+                    "Dropping invalid columns in "
+                    f"{type(self).__name__}.{howstr} is deprecated. "
+                    "In a future version, a TypeError will be raised. "
+                    f"Before calling .{howstr}, select only columns which "
+                    "should be valid for the function.",
+                    FutureWarning,
+                    stacklevel=3,
+                )
+                continue
+
+            key = base.OutputKey(label=obj.name, position=idx)
             output[key] = result
 
         # error_msg is "" on an frame/series with no rows or columns