Skip to content

REF: avoid special-casing in SelectN #45956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import (
BaseMaskedDtype,
ExtensionDtype,
PandasDtype,
)
Expand Down Expand Up @@ -103,6 +104,7 @@
Series,
)
from pandas.core.arrays import (
BaseMaskedArray,
DatetimeArray,
ExtensionArray,
TimedeltaArray,
Expand Down Expand Up @@ -142,6 +144,15 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
if is_object_dtype(values.dtype):
return ensure_object(np.asarray(values))

elif isinstance(values.dtype, BaseMaskedDtype):
# i.e. BooleanArray, FloatingArray, IntegerArray
values = cast("BaseMaskedArray", values)
if not values._hasna:
# No pd.NAs -> We can avoid an object-dtype cast (and copy) GH#41816
# recurse to avoid re-implementing logic for eg bool->uint8
return _ensure_data(values._data)
return np.asarray(values)

elif is_bool_dtype(values.dtype):
if isinstance(values, np.ndarray):
# i.e. actually dtype == np.dtype("bool")
Expand Down Expand Up @@ -1188,18 +1199,6 @@ def compute(self, method: str) -> Series:
dropped = self.obj.dropna()
nan_index = self.obj.drop(dropped.index)

if is_extension_array_dtype(dropped.dtype):
# GH#41816 bc we have dropped NAs above, MaskedArrays can use the
# numpy logic.
from pandas.core.arrays import BaseMaskedArray

arr = dropped._values
if isinstance(arr, BaseMaskedArray):
ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name)

result = type(self)(ser, n=self.n, keep=self.keep).compute(method)
return result.astype(arr.dtype)

# slow method
if n >= len(self.obj):
ascending = method == "nsmallest"
Expand Down
44 changes: 2 additions & 42 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,9 @@
SequenceIndexer,
Shape,
npt,
type_t,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.util._decorators import doc
from pandas.util._validators import validate_fillna_kwargs

from pandas.core.dtypes.astype import astype_nansafe
Expand All @@ -53,6 +49,7 @@
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype
from pandas.core.dtypes.inference import is_array_like
from pandas.core.dtypes.missing import (
array_equivalent,
Expand Down Expand Up @@ -93,43 +90,6 @@
BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")


class BaseMaskedDtype(ExtensionDtype):
"""
Base class for dtypes for BaseMaskedArray subclasses.
"""

name: str
base = None
type: type

na_value = libmissing.NA

@cache_readonly
def numpy_dtype(self) -> np.dtype:
"""Return an instance of our numpy dtype"""
return np.dtype(self.type)

@cache_readonly
def kind(self) -> str:
return self.numpy_dtype.kind

@cache_readonly
def itemsize(self) -> int:
"""Return the number of bytes in this dtype"""
return self.numpy_dtype.itemsize

@classmethod
def construct_array_type(cls) -> type_t[BaseMaskedArray]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
raise NotImplementedError


class BaseMaskedArray(OpsMixin, ExtensionArray):
"""
Base class for masked arrays (which use _data and _mask to store the data).
Expand Down
39 changes: 39 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np
import pytz

from pandas._libs import missing as libmissing
from pandas._libs.interval import Interval
from pandas._libs.properties import cache_readonly
from pandas._libs.tslibs import (
Expand Down Expand Up @@ -57,6 +58,7 @@
Index,
)
from pandas.core.arrays import (
BaseMaskedArray,
DatetimeArray,
IntervalArray,
PandasArray,
Expand Down Expand Up @@ -1376,3 +1378,40 @@ def itemsize(self) -> int:
The element size of this data-type object.
"""
return self._dtype.itemsize


class BaseMaskedDtype(ExtensionDtype):
"""
Base class for dtypes for BaseMaskedArray subclasses.
"""

name: str
base = None
type: type

na_value = libmissing.NA

@cache_readonly
def numpy_dtype(self) -> np.dtype:
"""Return an instance of our numpy dtype"""
return np.dtype(self.type)

@cache_readonly
def kind(self) -> str:
return self.numpy_dtype.kind

@cache_readonly
def itemsize(self) -> int:
"""Return the number of bytes in this dtype"""
return self.numpy_dtype.itemsize

@classmethod
def construct_array_type(cls) -> type_t[BaseMaskedArray]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
raise NotImplementedError