Merge branch 'main' into cooolheater-issue59965

cooolheater · web-flow · commit 596c591f686c · 2024-12-16T13:51:13.000+09:00
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -81,7 +81,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
-        -i "pandas.arrays.IntervalArray.length SA01" \
         -i "pandas.arrays.NumpyExtensionArray SA01" \
         -i "pandas.arrays.TimedeltaArray PR07,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -56,6 +56,7 @@ Other enhancements
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
+- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :meth:`str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1306,6 +1306,20 @@ def length(self) -> Index:
         """
         Return an Index with entries denoting the length of each Interval.
 
+        The length of an interval is calculated as the difference between
+        its `right` and `left` bounds. This property is particularly useful
+        when working with intervals where the size of the interval is an important
+        attribute, such as in time-series analysis or spatial data analysis.
+
+        See Also
+        --------
+        arrays.IntervalArray.left : Return the left endpoints of each Interval in
+            the IntervalArray as an Index.
+        arrays.IntervalArray.right : Return the right endpoints of each Interval in
+            the IntervalArray as an Index.
+        arrays.IntervalArray.mid : Return the midpoint of each Interval in the
+            IntervalArray as an Index.
+
         Examples
         --------
 
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -490,7 +490,7 @@ def online(
         klass="Series/Dataframe",
         axis="",
     )
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
@@ -981,7 +981,7 @@ def reset(self) -> None:
         """
         self._mean.reset()
 
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         raise NotImplementedError("aggregate is not implemented.")
 
     def std(self, bias: bool = False, *args, **kwargs):
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
@@ -167,7 +167,7 @@ def _get_window_indexer(self) -> BaseIndexer:
         klass="Series/Dataframe",
         axis="",
     )
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -44,7 +44,10 @@
 
 from pandas.core._numba import executor
 from pandas.core.algorithms import factorize
-from pandas.core.apply import ResamplerWindowApply
+from pandas.core.apply import (
+    ResamplerWindowApply,
+    reconstruct_func,
+)
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import SelectionMixin
 import pandas.core.common as com
@@ -646,8 +649,12 @@ def _numba_apply(
             out = obj._constructor(result, index=index, columns=columns)
             return self._resolve_output(out, obj)
 
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
+        relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
+        if isinstance(result, ABCDataFrame) and relabeling:
+            result = result.iloc[:, order]
+            result.columns = columns  # type: ignore[union-attr]
         if result is None:
             return self.apply(func, raw=False, args=args, kwargs=kwargs)
         return result
@@ -1239,7 +1246,7 @@ def calc(x):
         klass="Series/DataFrame",
         axis="",
     )
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
         if result is None:
             # these must apply directly
@@ -1951,7 +1958,7 @@ def _raise_monotonic_error(self, msg: str):
         klass="Series/Dataframe",
         axis="",
     )
-    def aggregate(self, func, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -241,7 +241,7 @@ def read_sql_table(  # pyright: ignore[reportOverlappingOverload]
     schema=...,
     index_col: str | list[str] | None = ...,
     coerce_float=...,
-    parse_dates: list[str] | dict[str, str] | None = ...,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     columns: list[str] | None = ...,
     chunksize: None = ...,
     dtype_backend: DtypeBackend | lib.NoDefault = ...,
@@ -255,7 +255,7 @@ def read_sql_table(
     schema=...,
     index_col: str | list[str] | None = ...,
     coerce_float=...,
-    parse_dates: list[str] | dict[str, str] | None = ...,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     columns: list[str] | None = ...,
     chunksize: int = ...,
     dtype_backend: DtypeBackend | lib.NoDefault = ...,
@@ -268,7 +268,7 @@ def read_sql_table(
     schema: str | None = None,
     index_col: str | list[str] | None = None,
     coerce_float: bool = True,
-    parse_dates: list[str] | dict[str, str] | None = None,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = None,
     columns: list[str] | None = None,
     chunksize: int | None = None,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
@@ -372,7 +372,7 @@ def read_sql_query(  # pyright: ignore[reportOverlappingOverload]
     index_col: str | list[str] | None = ...,
     coerce_float=...,
     params: list[Any] | Mapping[str, Any] | None = ...,
-    parse_dates: list[str] | dict[str, str] | None = ...,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     chunksize: None = ...,
     dtype: DtypeArg | None = ...,
     dtype_backend: DtypeBackend | lib.NoDefault = ...,
@@ -386,7 +386,7 @@ def read_sql_query(
     index_col: str | list[str] | None = ...,
     coerce_float=...,
     params: list[Any] | Mapping[str, Any] | None = ...,
-    parse_dates: list[str] | dict[str, str] | None = ...,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = ...,
     chunksize: int = ...,
     dtype: DtypeArg | None = ...,
     dtype_backend: DtypeBackend | lib.NoDefault = ...,
@@ -399,7 +399,7 @@ def read_sql_query(
     index_col: str | list[str] | None = None,
     coerce_float: bool = True,
     params: list[Any] | Mapping[str, Any] | None = None,
-    parse_dates: list[str] | dict[str, str] | None = None,
+    parse_dates: list[str] | dict[str, str] | dict[str, dict[str, Any]] | None = None,
     chunksize: int | None = None,
     dtype: DtypeArg | None = None,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
diff --git a/pandas/tests/groupby/__init__.py b/pandas/tests/groupby/__init__.py
@@ -2,7 +2,7 @@ def get_groupby_method_args(name, obj):
     """
     Get required arguments for a groupby method.
 
-    When parametrizing a test over groupby methods (e.g. "sum", "mean", "fillna"),
+    When parametrizing a test over groupby methods (e.g. "sum", "mean"),
     it is often the case that arguments are required for certain methods.
 
     Parameters
@@ -16,7 +16,7 @@ def get_groupby_method_args(name, obj):
     -------
     A tuple of required arguments for the method.
     """
-    if name in ("nth", "fillna", "take"):
+    if name in ("nth", "take"):
         return (0,)
     if name == "quantile":
         return (0.5,)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -1963,10 +1963,7 @@ def test_category_order_transformer(
         df = df.set_index(keys)
     args = get_groupby_method_args(transformation_func, df)
     gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
-    warn = FutureWarning if transformation_func == "fillna" else None
-    msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=msg):
-        op_result = getattr(gb, transformation_func)(*args)
+    op_result = getattr(gb, transformation_func)(*args)
     result = op_result.index.get_level_values("a").categories
     expected = Index([1, 4, 3, 2])
     tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2098,36 +2098,14 @@ def test_group_on_empty_multiindex(transformation_func, request):
     df["col_3"] = df["col_3"].astype(int)
     df["col_4"] = df["col_4"].astype(int)
     df = df.set_index(["col_1", "col_2"])
-    if transformation_func == "fillna":
-        args = ("ffill",)
-    else:
-        args = ()
-    warn = FutureWarning if transformation_func == "fillna" else None
-    warn_msg = "DataFrameGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
+    result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func)
+    expected = df.groupby(["col_1"]).transform(transformation_func).iloc[:0]
     if transformation_func in ("diff", "shift"):
         expected = expected.astype(int)
     tm.assert_equal(result, expected)
 
-    warn_msg = "SeriesGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        result = (
-            df["col_3"]
-            .iloc[:0]
-            .groupby(["col_1"])
-            .transform(transformation_func, *args)
-        )
-    warn_msg = "SeriesGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=warn_msg):
-        expected = (
-            df["col_3"]
-            .groupby(["col_1"])
-            .transform(transformation_func, *args)
-            .iloc[:0]
-        )
+    result = df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func)
+    expected = df["col_3"].groupby(["col_1"]).transform(transformation_func).iloc[:0]
     if transformation_func in ("diff", "shift"):
         expected = expected.astype(int)
     tm.assert_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
@@ -36,11 +36,11 @@ def test_groupby_preserves_subclass(obj, groupby_func):
 
     args = get_groupby_method_args(groupby_func, obj)
 
-    warn = FutureWarning if groupby_func == "fillna" else None
-    msg = f"{type(grouped).__name__}.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
+    warn = FutureWarning if groupby_func == "corrwith" else None
+    msg = f"{type(grouped).__name__}.corrwith is deprecated"
+    with tm.assert_produces_warning(warn, match=msg):
         result1 = getattr(grouped, groupby_func)(*args)
-    with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
+    with tm.assert_produces_warning(warn, match=msg):
         result2 = grouped.agg(groupby_func, *args)
 
     # Reduction or transformation kernels should preserve type
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
@@ -278,14 +278,11 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
         kernel in ("first", "last")
         or (
             # kernels that work on any dtype and don't have numeric_only arg
-            kernel in ("any", "all", "bfill", "ffill", "fillna", "nth", "nunique")
+            kernel in ("any", "all", "bfill", "ffill", "nth", "nunique")
             and numeric_only is lib.no_default
         )
     ):
-        warn = FutureWarning if kernel == "fillna" else None
-        msg = "DataFrameGroupBy.fillna is deprecated"
-        with tm.assert_produces_warning(warn, match=msg):
-            result = method(*args, **kwargs)
+        result = method(*args, **kwargs)
         assert "b" in result.columns
     elif has_arg:
         assert numeric_only is not True
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
@@ -144,7 +144,6 @@ def test_groupby_raises_string(
         ),
         "diff": (TypeError, "unsupported operand type"),
         "ffill": (None, ""),
-        "fillna": (None, ""),
         "first": (None, ""),
         "idxmax": (None, ""),
         "idxmin": (None, ""),
@@ -211,10 +210,7 @@ def test_groupby_raises_string(
         elif groupby_func == "corrwith":
             msg = "Cannot perform reduction 'mean' with string dtype"
 
-    if groupby_func == "fillna":
-        kind = "Series" if groupby_series else "DataFrame"
-        warn_msg = f"{kind}GroupBy.fillna is deprecated"
-    elif groupby_func == "corrwith":
+    if groupby_func == "corrwith":
         warn_msg = "DataFrameGroupBy.corrwith is deprecated"
     else:
         warn_msg = ""
@@ -301,7 +297,6 @@ def test_groupby_raises_datetime(
         "cumsum": (TypeError, "datetime64 type does not support operation 'cumsum'"),
         "diff": (None, ""),
         "ffill": (None, ""),
-        "fillna": (None, ""),
         "first": (None, ""),
         "idxmax": (None, ""),
         "idxmin": (None, ""),
@@ -333,10 +328,7 @@ def test_groupby_raises_datetime(
         "var": (TypeError, "datetime64 type does not support operation 'var'"),
     }[groupby_func]
 
-    if groupby_func == "fillna":
-        kind = "Series" if groupby_series else "DataFrame"
-        warn_msg = f"{kind}GroupBy.fillna is deprecated"
-    elif groupby_func == "corrwith":
+    if groupby_func == "corrwith":
         warn_msg = "DataFrameGroupBy.corrwith is deprecated"
     else:
         warn_msg = ""
@@ -457,7 +449,6 @@ def test_groupby_raises_category(
             r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
         ),
         "ffill": (None, ""),
-        "fillna": (None, ""),  # no-op with CoW
         "first": (None, ""),
         "idxmax": (None, ""),
         "idxmin": (None, ""),
@@ -532,10 +523,7 @@ def test_groupby_raises_category(
         ),
     }[groupby_func]
 
-    if groupby_func == "fillna":
-        kind = "Series" if groupby_series else "DataFrame"
-        warn_msg = f"{kind}GroupBy.fillna is deprecated"
-    elif groupby_func == "corrwith":
+    if groupby_func == "corrwith":
         warn_msg = "DataFrameGroupBy.corrwith is deprecated"
     else:
         warn_msg = ""
@@ -650,7 +638,6 @@ def test_groupby_raises_category_on_category(
         ),
         "diff": (TypeError, "unsupported operand type"),
         "ffill": (None, ""),
-        "fillna": (None, ""),  # no-op with CoW
         "first": (None, ""),
         "idxmax": (ValueError, "empty group due to unobserved categories")
         if empty_groups
@@ -710,10 +697,7 @@ def test_groupby_raises_category_on_category(
         ),
     }[groupby_func]
 
-    if groupby_func == "fillna":
-        kind = "Series" if groupby_series else "DataFrame"
-        warn_msg = f"{kind}GroupBy.fillna is deprecated"
-    elif groupby_func == "corrwith":
+    if groupby_func == "corrwith":
         warn_msg = "DataFrameGroupBy.corrwith is deprecated"
     else:
         warn_msg = ""
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -329,9 +329,6 @@ def test_transform_transformation_func(transformation_func):
     if transformation_func == "cumcount":
         test_op = lambda x: x.transform("cumcount")
         mock_op = lambda x: Series(range(len(x)), x.index)
-    elif transformation_func == "fillna":
-        test_op = lambda x: x.transform("fillna", value=0)
-        mock_op = lambda x: x.fillna(value=0)
     elif transformation_func == "ngroup":
         test_op = lambda x: x.transform("ngroup")
         counter = -1
@@ -1436,11 +1433,7 @@ def test_null_group_str_transformer_series(dropna, transformation_func):
         dtype = object if transformation_func in ("any", "all") else None
         buffer.append(Series([np.nan], index=[3], dtype=dtype))
     expected = concat(buffer)
-
-    warn = FutureWarning if transformation_func == "fillna" else None
-    msg = "SeriesGroupBy.fillna is deprecated"
-    with tm.assert_produces_warning(warn, match=msg):
-        result = gb.transform(transformation_func, *args)
+    result = gb.transform(transformation_func, *args)
 
     tm.assert_equal(result, expected)
 
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py

Original file line number	Diff line number	Diff line change
`@@ -167,7 +167,7 @@ def _get_window_indexer(self) -> BaseIndexer:`
`167`	`167`	`klass="Series/Dataframe",`
`168`	`168`	`axis="",`
`169`	`169`	`)`
`170`		`- def aggregate(self, func, args, *kwargs):`
	`170`	`+ def aggregate(self, func=None, args, *kwargs):`
`171`	`171`	`return super().aggregate(func, args, *kwargs)`
`172`	`172`
`173`	`173`	`agg = aggregate`