Skip to content

Commit 7b7fafe

Browse files
authored
ENH: __array_ufunc__ handle np.minimum.reduce (#43923)
1 parent 9921168 commit 7b7fafe

File tree

8 files changed

+159
-9
lines changed

8 files changed

+159
-9
lines changed

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,7 @@ Datetimelike
602602
- Bug in in calling ``np.isnan``, ``np.isfinite``, or ``np.isinf`` on a timezone-aware :class:`DatetimeIndex` incorrectly raising ``TypeError`` (:issue:`43917`)
603603
- Bug in constructing a :class:`Series` from datetime-like strings with mixed timezones incorrectly partially-inferring datetime values (:issue:`40111`)
604604
- Bug in addition with a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`)
605+
- ``np.maximum.reduce`` and ``np.minimum.reduce`` now correctly return :class:`Timestamp` and :class:`Timedelta` objects when operating on :class:`Series`, :class:`DataFrame`, or :class:`Index` with ``datetime64[ns]`` or ``timedelta64[ns]`` dtype (:issue:`43923`)
605606
- Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`)
606607
- Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`)
607608
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
@@ -793,6 +794,7 @@ ExtensionArray
793794
^^^^^^^^^^^^^^
794795
- Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
795796
- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
797+
- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
796798
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
797799
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
798800
-

pandas/core/arraylike.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,20 @@
1313
from pandas._libs import lib
1414
from pandas.util._exceptions import find_stack_level
1515

16+
from pandas.core.dtypes.generic import ABCNDFrame
17+
1618
from pandas.core.construction import extract_array
1719
from pandas.core.ops import (
1820
maybe_dispatch_ufunc_to_dunder_op,
1921
roperator,
2022
)
2123
from pandas.core.ops.common import unpack_zerodim_and_defer
2224

25+
REDUCTION_ALIASES = {
26+
"maximum": "max",
27+
"minimum": "min",
28+
}
29+
2330

2431
class OpsMixin:
2532
# -------------------------------------------------------------
@@ -344,7 +351,7 @@ def reconstruct(result):
344351
raise NotImplementedError
345352
return result
346353
if isinstance(result, BlockManager):
347-
# we went through BlockManager.apply
354+
# we went through BlockManager.apply e.g. np.sqrt
348355
result = self._constructor(result, **reconstruct_kwargs, copy=False)
349356
else:
350357
# we converted an array, lost our axes
@@ -363,6 +370,11 @@ def reconstruct(result):
363370
result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
364371
return reconstruct(result)
365372

373+
if method == "reduce":
374+
result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
375+
if result is not NotImplemented:
376+
return result
377+
366378
# We still get here with kwargs `axis` for e.g. np.maximum.accumulate
367379
# and `dtype` and `keepdims` for np.ptp
368380

@@ -373,6 +385,8 @@ def reconstruct(result):
373385
# returned a Tuple[BlockManager].
374386
# * len(inputs) > 1 is doable when we know that we have
375387
# aligned blocks / dtypes.
388+
389+
# e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
376390
inputs = tuple(np.asarray(x) for x in inputs)
377391
# Note: we can't use default_array_ufunc here bc reindexing means
378392
# that `self` may not be among `inputs`
@@ -393,6 +407,7 @@ def reconstruct(result):
393407
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
394408
# Those can have an axis keyword and thus can't be called block-by-block
395409
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
410+
# e.g. np.negative (only one reached), with "where" and "out" in kwargs
396411

397412
result = reconstruct(result)
398413
return result
@@ -473,3 +488,39 @@ def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
473488
new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
474489

475490
return getattr(ufunc, method)(*new_inputs, **kwargs)
491+
492+
493+
def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
494+
"""
495+
Dispatch ufunc reductions to self's reduction methods.
496+
"""
497+
assert method == "reduce"
498+
499+
if len(inputs) != 1 or inputs[0] is not self:
500+
return NotImplemented
501+
502+
if ufunc.__name__ not in REDUCTION_ALIASES:
503+
return NotImplemented
504+
505+
method_name = REDUCTION_ALIASES[ufunc.__name__]
506+
507+
# NB: we are assuming that min/max represent minimum/maximum methods,
508+
# which would not be accurate for e.g. Timestamp.min
509+
if not hasattr(self, method_name):
510+
return NotImplemented
511+
512+
if self.ndim > 1:
513+
if isinstance(self, ABCNDFrame):
514+
# TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
515+
kwargs["numeric_only"] = False
516+
517+
if "axis" not in kwargs:
518+
# For DataFrame reductions we don't want the default axis=0
519+
# FIXME: DataFrame.min ignores axis=None
520+
# FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
521+
# but np.minimum.reduce(df.values) behaves as if axis=0
522+
kwargs["axis"] = None
523+
524+
# By default, numpy's reductions do not skip NaNs, so we have to
525+
# pass skipna=False
526+
return getattr(self, method_name)(skipna=False, **kwargs)

pandas/core/arrays/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,6 +1547,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
15471547
self, ufunc, method, *inputs, **kwargs
15481548
)
15491549

1550+
if method == "reduce":
1551+
result = arraylike.dispatch_reduction_ufunc(
1552+
self, ufunc, method, *inputs, **kwargs
1553+
)
1554+
if result is not NotImplemented:
1555+
return result
1556+
15501557
return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
15511558

15521559

pandas/core/arrays/masked.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
)
5656

5757
from pandas.core import (
58+
arraylike,
5859
missing,
5960
nanops,
6061
ops,
@@ -415,7 +416,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
415416
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
416417
# For MaskedArray inputs, we apply the ufunc to ._data
417418
# and mask the result.
418-
if method == "reduce":
419+
if method == "reduce" and ufunc not in [np.maximum, np.minimum]:
419420
# Not clear how to handle missing values in reductions. Raise.
420421
raise NotImplementedError("The 'reduce' method is not supported.")
421422

@@ -432,6 +433,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
432433
if result is not NotImplemented:
433434
return result
434435

436+
if method == "reduce":
437+
result = arraylike.dispatch_reduction_ufunc(
438+
self, ufunc, method, *inputs, **kwargs
439+
)
440+
if result is not NotImplemented:
441+
return result
442+
435443
mask = np.zeros(len(self), dtype=bool)
436444
inputs2 = []
437445
for x in inputs:

pandas/core/arrays/numpy_.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import numbers
44

55
import numpy as np
6-
from numpy.lib.mixins import NDArrayOperatorsMixin
76

87
from pandas._libs import lib
98
from pandas._typing import (
@@ -31,7 +30,6 @@
3130
class PandasArray(
3231
OpsMixin,
3332
NDArrayBackedExtensionArray,
34-
NDArrayOperatorsMixin,
3533
ObjectStringArrayMixin,
3634
):
3735
"""

pandas/core/indexes/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
876876
if result is not NotImplemented:
877877
return result
878878

879+
if method == "reduce":
880+
result = arraylike.dispatch_reduction_ufunc(
881+
self, ufunc, method, *inputs, **kwargs
882+
)
883+
if result is not NotImplemented:
884+
return result
885+
879886
new_inputs = [x if x is not self else x._values for x in inputs]
880887
result = getattr(ufunc, method)(*new_inputs, **kwargs)
881888
if ufunc.nout == 2:

pandas/tests/indexes/test_numpy_compat.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
import pytest
33

44
from pandas import (
5+
CategoricalIndex,
56
DatetimeIndex,
67
Index,
78
NumericIndex,
89
PeriodIndex,
910
TimedeltaIndex,
11+
isna,
1012
)
1113
import pandas._testing as tm
1214
from pandas.core.api import Float64Index
@@ -98,3 +100,29 @@ def test_numpy_ufuncs_other(index, func, request):
98100
else:
99101
with tm.external_error_raised(TypeError):
100102
func(index)
103+
104+
105+
@pytest.mark.parametrize("func", [np.maximum, np.minimum])
106+
def test_numpy_ufuncs_reductions(index, func):
107+
# TODO: overlap with tests.series.test_ufunc.test_reductions
108+
if len(index) == 0:
109+
return
110+
111+
if isinstance(index, CategoricalIndex) and index.dtype.ordered is False:
112+
with pytest.raises(TypeError, match="is not ordered for"):
113+
func.reduce(index)
114+
return
115+
else:
116+
result = func.reduce(index)
117+
118+
if func is np.maximum:
119+
expected = index.max(skipna=False)
120+
else:
121+
expected = index.min(skipna=False)
122+
# TODO: do we have cases both with and without NAs?
123+
124+
assert type(result) is type(expected)
125+
if isna(result):
126+
assert isna(expected)
127+
else:
128+
assert result == expected

pandas/tests/series/test_ufunc.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -255,16 +255,65 @@ def __add__(self, other):
255255
@pytest.mark.parametrize(
256256
"values",
257257
[
258-
pd.array([1, 3, 2], dtype="int64"),
259-
pd.array([1, 10, 0], dtype="Sparse[int]"),
258+
pd.array([1, 3, 2], dtype=np.int64),
259+
pd.array([1, 3, 2], dtype="Int64"),
260+
pd.array([1, 3, 2], dtype="Float32"),
261+
pd.array([1, 10, 2], dtype="Sparse[int]"),
260262
pd.to_datetime(["2000", "2010", "2001"]),
261263
pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
262264
pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
265+
pd.to_timedelta(["1 Day", "3 Days", "2 Days"]),
266+
pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]),
263267
],
268+
ids=lambda x: str(x.dtype),
264269
)
265-
def test_reduce(values):
266-
a = pd.Series(values)
267-
assert np.maximum.reduce(a) == values[1]
270+
@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame])
271+
def test_reduce(values, box, request):
272+
# TODO: cases with NAs
273+
274+
same_type = True
275+
276+
if box is pd.Index:
277+
if values.dtype.kind in ["i", "f"]:
278+
# ATM Index casts to object, so we get python ints/floats
279+
same_type = False
280+
elif isinstance(values, pd.IntervalIndex):
281+
mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented")
282+
request.node.add_marker(mark)
283+
284+
elif box is pd.Series or box is pd.DataFrame:
285+
if isinstance(values, pd.IntervalIndex):
286+
mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented")
287+
request.node.add_marker(mark)
288+
289+
if values.dtype == "i8" and box is pd.array:
290+
# FIXME: pd.array casts to Int64
291+
obj = values
292+
else:
293+
obj = box(values)
294+
295+
result = np.maximum.reduce(obj)
296+
expected = values[1]
297+
if box is pd.DataFrame:
298+
# TODO: cases with axis kwarg
299+
expected = obj.max(numeric_only=False)
300+
tm.assert_series_equal(result, expected)
301+
else:
302+
assert result == expected
303+
if same_type:
304+
# check we have e.g. Timestamp instead of dt64
305+
assert type(result) == type(expected)
306+
307+
result = np.minimum.reduce(obj)
308+
expected = values[0]
309+
if box is pd.DataFrame:
310+
expected = obj.min(numeric_only=False)
311+
tm.assert_series_equal(result, expected)
312+
else:
313+
assert result == expected
314+
if same_type:
315+
# check we have e.g. Timestamp instead of dt64
316+
assert type(result) == type(expected)
268317

269318

270319
@pytest.mark.parametrize("type_", [list, deque, tuple])

0 commit comments

Comments
 (0)