Skip to content

REF: Implement EA._mode, de-special-case categorical/dtlike #45033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 8 additions & 19 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ def duplicated(
return htable.duplicated(values, keep=keep)


def mode(values, dropna: bool = True) -> Series:
def mode(values: ArrayLike, dropna: bool = True) -> ArrayLike:
"""
Returns the mode(s) of an array.

Expand All @@ -948,27 +948,17 @@ def mode(values, dropna: bool = True) -> Series:

Returns
-------
mode : Series
np.ndarray or ExtensionArray
"""
from pandas import Series
from pandas.core.indexes.api import default_index

values = _ensure_arraylike(values)
original = values

# categorical is a fast-path
if is_categorical_dtype(values.dtype):
if isinstance(values, Series):
# TODO: should we be passing `name` below?
return Series(values._values.mode(dropna=dropna), name=values.name)
return values.mode(dropna=dropna)

if needs_i8_conversion(values.dtype):
if dropna:
mask = values.isna()
values = values[~mask]
modes = mode(values.view("i8"))
return modes.view(original.dtype)
# Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray.
values = ensure_wrapped_if_datetimelike(values)
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
# ndarray[Any, Any]]" has no attribute "_mode"
return values._mode(dropna=dropna) # type: ignore[union-attr]

values = _ensure_data(values)

Expand All @@ -979,8 +969,7 @@ def mode(values, dropna: bool = True) -> Series:
warn(f"Unable to sort modes: {err}")

result = _reconstruct_data(npresult, original.dtype, original)
# Ensure index is type stable (should always use int index)
return Series(result, index=default_index(len(result)))
return result


def rank(
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from pandas.core.algorithms import (
factorize_array,
isin,
mode,
rank,
unique,
)
Expand Down Expand Up @@ -1578,6 +1579,26 @@ def _quantile(

return result

def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
"""
Returns the mode(s) of the ExtensionArray.

Always returns `ExtensionArray` even if only one value.

Parameters
----------
dropna : bool, default True
Don't consider counts of NA values.

Returns
-------
same type as self
Sorted, if possible.
"""
# error: Incompatible return value type (got "Union[ExtensionArray,
# ndarray[Any, Any]]", expected "ExtensionArrayT")
return mode(self, dropna=dropna) # type: ignore[return-value]

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2229,7 +2229,7 @@ def max(self, *, skipna=True, **kwargs):
pointer = self._codes.max()
return self._wrap_reduction_result(None, pointer)

def mode(self, dropna=True):
def mode(self, dropna: bool = True) -> Categorical:
"""
Returns the mode(s) of the Categorical.

Expand All @@ -2244,6 +2244,15 @@ def mode(self, dropna=True):
-------
modes : `Categorical` (sorted)
"""
warn(
"Categorical.mode is deprecated and will be removed in a future version. "
"Use Series.mode instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._mode(dropna=dropna)

def _mode(self, dropna: bool = True) -> Categorical:
codes = self._codes
if dropna:
good = self._codes != -1
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
from pandas.core.algorithms import (
checked_add_with_arr,
isin,
mode,
unique1d,
)
from pandas.core.arraylike import OpsMixin
Expand Down Expand Up @@ -1531,6 +1532,17 @@ def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs):
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def _mode(self, dropna: bool = True):
values = self
if dropna:
mask = values.isna()
values = values[~mask]

i8modes = mode(values.view("i8"))
npmodes = i8modes.view(self._ndarray.dtype)
npmodes = cast(np.ndarray, npmodes)
return self._from_backing_data(npmodes)


class DatelikeOps(DatetimeLikeArrayMixin):
"""
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1999,7 +1999,16 @@ def mode(self, dropna: bool = True) -> Series:
Modes of the Series in sorted order.
"""
# TODO: Add option for bins like value_counts()
return algorithms.mode(self, dropna=dropna)
values = self._values
if isinstance(values, np.ndarray):
res_values = algorithms.mode(values, dropna=dropna)
else:
res_values = values._mode(dropna=dropna)

# Ensure index is type stable (should always use int index)
return self._constructor(
res_values, index=range(len(res_values)), name=self.name
)

def unique(self) -> ArrayLike:
"""
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ def test_numpy_min_max_axis_equals_none(self, method, expected):
)
def test_mode(self, values, categories, exp_mode):
s = Categorical(values, categories=categories, ordered=True)
res = s.mode()
msg = "Use Series.mode instead"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

next pass rename to cat

with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.mode()
exp = Categorical(exp_mode, categories=categories, ordered=True)
tm.assert_categorical_equal(res, exp)

Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/series/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
Series,
)
import pandas._testing as tm
from pandas.core.algorithms import mode


@pytest.mark.parametrize("as_period", [True, False])
Expand All @@ -24,12 +23,6 @@ def test_mode_extension_dtype(as_period):
assert res.dtype == ser.dtype
tm.assert_series_equal(res, ser)

res = mode(ser._values)
tm.assert_series_equal(res, ser)

res = mode(pd.Index(ser))
tm.assert_series_equal(res, ser)


def test_reductions_td64_with_nat():
# GH#8617
Expand Down
98 changes: 58 additions & 40 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2261,7 +2261,7 @@ def test_int64_add_overflow():
class TestMode:
def test_no_mode(self):
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
tm.assert_series_equal(algos.mode([]), exp)
tm.assert_numpy_array_equal(algos.mode([]), exp.values)

@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_mode_single(self, dt):
Expand All @@ -2272,20 +2272,22 @@ def test_mode_single(self, dt):
exp_multi = [1]
data_multi = [1, 1]

s = Series(data_single, dtype=dt)
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

s = Series(data_multi, dtype=dt)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

def test_mode_obj_int(self):
exp = Series([1], dtype=int)
tm.assert_series_equal(algos.mode([1]), exp)
tm.assert_numpy_array_equal(algos.mode([1]), exp.values)

exp = Series(["a", "b", "c"], dtype=object)
tm.assert_series_equal(algos.mode(["a", "b", "c"]), exp)
tm.assert_numpy_array_equal(algos.mode(["a", "b", "c"]), exp.values)

@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_number_mode(self, dt):
Expand All @@ -2295,104 +2297,120 @@ def test_number_mode(self, dt):
exp_multi = [1, 3]
data_multi = [1] * 5 + [2] * 3 + [3] * 5

s = Series(data_single, dtype=dt)
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

s = Series(data_multi, dtype=dt)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

def test_strobj_mode(self):
exp = ["b"]
data = ["a"] * 2 + ["b"] * 3

s = Series(data, dtype="c")
ser = Series(data, dtype="c")
exp = Series(exp, dtype="c")
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

@pytest.mark.parametrize("dt", [str, object])
def test_strobj_multi_char(self, dt):
exp = ["bar"]
data = ["foo"] * 2 + ["bar"] * 3

s = Series(data, dtype=dt)
ser = Series(data, dtype=dt)
exp = Series(exp, dtype=dt)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

def test_datelike_mode(self):
exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")
s = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
tm.assert_series_equal(algos.mode(s), exp)
ser = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
tm.assert_series_equal(ser.mode(), exp)

exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]")
s = Series(
ser = Series(
["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"],
dtype="M8[ns]",
)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
tm.assert_series_equal(ser.mode(), exp)

def test_timedelta_mode(self):
exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]")
s = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
tm.assert_series_equal(algos.mode(s), exp)
ser = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
tm.assert_series_equal(ser.mode(), exp)

exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
s = Series(
ser = Series(
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
dtype="timedelta64[ns]",
)
tm.assert_series_equal(algos.mode(s), exp)
tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
tm.assert_series_equal(ser.mode(), exp)

def test_mixed_dtype(self):
exp = Series(["foo"])
s = Series([1, "foo", "foo"])
tm.assert_series_equal(algos.mode(s), exp)
ser = Series([1, "foo", "foo"])
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

def test_uint64_overflow(self):
exp = Series([2 ** 63], dtype=np.uint64)
s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64)
tm.assert_series_equal(algos.mode(s), exp)
ser = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

exp = Series([1, 2 ** 63], dtype=np.uint64)
s = Series([1, 2 ** 63], dtype=np.uint64)
tm.assert_series_equal(algos.mode(s), exp)
ser = Series([1, 2 ** 63], dtype=np.uint64)
tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
tm.assert_series_equal(ser.mode(), exp)

def test_categorical(self):
c = Categorical([1, 2])
exp = c
tm.assert_categorical_equal(algos.mode(c), exp)
tm.assert_categorical_equal(c.mode(), exp)
msg = "Categorical.mode is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = c.mode()
tm.assert_categorical_equal(res, exp)

c = Categorical([1, "a", "a"])
exp = Categorical(["a"], categories=[1, "a"])
tm.assert_categorical_equal(algos.mode(c), exp)
tm.assert_categorical_equal(c.mode(), exp)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = c.mode()
tm.assert_categorical_equal(res, exp)

c = Categorical([1, 1, 2, 3, 3])
exp = Categorical([1, 3], categories=[1, 2, 3])
tm.assert_categorical_equal(algos.mode(c), exp)
tm.assert_categorical_equal(c.mode(), exp)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = c.mode()
tm.assert_categorical_equal(res, exp)

def test_index(self):
idx = Index([1, 2, 3])
exp = Series([1, 2, 3], dtype=np.int64)
tm.assert_series_equal(algos.mode(idx), exp)
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)

idx = Index([1, "a", "a"])
exp = Series(["a"], dtype=object)
tm.assert_series_equal(algos.mode(idx), exp)
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)

idx = Index([1, 1, 2, 3, 3])
exp = Series([1, 3], dtype=np.int64)
tm.assert_series_equal(algos.mode(idx), exp)
tm.assert_numpy_array_equal(algos.mode(idx), exp.values)

exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
idx = Index(
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
dtype="timedelta64[ns]",
)
tm.assert_series_equal(algos.mode(idx), exp)
with pytest.raises(AttributeError, match="TimedeltaIndex"):
# algos.mode expects Arraylike, does *not* unwrap TimedeltaIndex
algos.mode(idx)


class TestDiff:
Expand Down