Skip to content

REF: EA quantile logic to EA._quantile #44412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 3 additions & 99 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from pandas._typing import (
ArrayLike,
npt,
)

from pandas.core.dtypes.common import is_sparse
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
)

from pandas.core.nanops import nanpercentile

if TYPE_CHECKING:
from pandas.core.arrays import ExtensionArray


def quantile_compat(
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
Expand All @@ -40,23 +34,12 @@ def quantile_compat(
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
return _quantile_with_mask(values, mask, fill_value, qs, interpolation)
return quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
# In general we don't want to import from arrays here;
# this is temporary pending discussion in GH#41428
from pandas.core.arrays import BaseMaskedArray

if isinstance(values, BaseMaskedArray):
# e.g. IntegerArray, does not implement _from_factorized
out = _quantile_ea_fallback(values, qs, interpolation)

else:
out = _quantile_ea_compat(values, qs, interpolation)
return values._quantile(qs, interpolation)

return out


def _quantile_with_mask(
def quantile_with_mask(
values: np.ndarray,
mask: np.ndarray,
fill_value,
Expand Down Expand Up @@ -114,82 +97,3 @@ def _quantile_with_mask(
result = result.T

return result


def _quantile_ea_compat(
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArray:
"""
ExtensionArray compatibility layer for _quantile_with_mask.

We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
for compatibility with non-EA code.

Parameters
----------
values : ExtensionArray
qs : np.ndarray[float64]
interpolation: str

Returns
-------
ExtensionArray
"""
# TODO(EA2D): make-believe not needed with 2D EAs
orig = values

# asarray needed for Sparse, see GH#24600
mask = np.asarray(values.isna())
mask = np.atleast_2d(mask)

arr, fill_value = values._values_for_factorize()
arr = np.atleast_2d(arr)

result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D

if orig.ndim == 2:
# i.e. DatetimeArray
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]


def _quantile_ea_fallback(
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArray:
"""
quantile compatibility for ExtensionArray subclasses that do not
implement `_from_factorized`, e.g. IntegerArray.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(values.isna()))
npvalues = np.atleast_2d(np.asarray(values))

res = _quantile_with_mask(
npvalues,
mask=mask,
fill_value=values.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(values)._from_sequence(res, dtype=values.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.atleast_2d(np.asarray(res, dtype=np.float64))
return out
25 changes: 25 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
unique,
value_counts,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays.base import ExtensionArray
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -463,6 +464,30 @@ def value_counts(self, dropna: bool = True):
index = Index(index_arr, name=result.index.name)
return Series(result._values, index=index, name=result.name)

def _quantile(
self: NDArrayBackedExtensionArrayT,
qs: npt.NDArray[np.float64],
interpolation: str,
) -> NDArrayBackedExtensionArrayT:
# TODO: disable for Categorical if not ordered?

# asarray needed for Sparse, see GH#24600
mask = np.asarray(self.isna())
mask = np.atleast_2d(mask)

arr = np.atleast_2d(self._ndarray)
# TODO: something NDArrayBacked-specific instead of _values_for_factorize[1]?
fill_value = self._values_for_factorize()[1]

res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

result = type(self)._from_factorized(res_values, self)
if self.ndim == 1:
assert result.shape == (1, len(qs)), result.shape
result = result[0]

return result

# ------------------------------------------------------------------------
# numpy-like methods

Expand Down
36 changes: 36 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
isin,
unique,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.sorting import (
nargminmax,
nargsort,
Expand Down Expand Up @@ -1494,6 +1495,41 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
)
return result

def _quantile(
self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArrayT:
"""
Compute the quantiles of self for each quantile in `qs`.

Parameters
----------
qs : np.ndarray[float64]
interpolation: str

Returns
-------
same type as self
"""
# asarray needed for Sparse, see GH#24600
mask = np.asarray(self.isna())
mask = np.atleast_2d(mask)

arr = np.atleast_2d(np.asarray(self))
fill_value = np.nan

res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if self.ndim == 2:
# i.e. DatetimeArray
result = type(self)._from_sequence(res_values)

else:
# shape[0] should be 1 as long as EAs are 1D
assert res_values.shape == (1, len(qs)), res_values.shape
result = type(self)._from_sequence(res_values[0])

return result

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
take,
)
from pandas.core.array_algos import masked_reductions
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.indexers import check_array_indexer
Expand Down Expand Up @@ -692,6 +693,38 @@ def equals(self, other) -> bool:
right = other._data[~other._mask]
return array_equivalent(left, right, dtype_equal=True)

def _quantile(
self: BaseMaskedArrayT, qs: npt.NDArray[np.float64], interpolation: str
) -> BaseMaskedArrayT:
"""
Dispatch to quantile_with_mask, needed because we do not have
_from_factorized.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(self.isna()))
npvalues = np.atleast_2d(np.asarray(self))

res = quantile_with_mask(
npvalues,
mask=mask,
fill_value=self.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(self)._from_sequence(res, dtype=self.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.asarray(res, dtype=np.float64)
return out

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,12 @@ def value_counts(self, dropna: bool = True) -> Series:
keys = Index(keys)
return Series(counts, index=keys)

def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str):
# Special case: the returned array isn't _really_ sparse, so we don't
# wrap it in a SparseArray
result = super()._quantile(qs, interpolation)
return np.asarray(result)

# --------
# Indexing
# --------
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,9 @@ def quantile(
assert is_list_like(qs) # caller is responsible for this

result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
# ensure_block_shape needed for cases where we start with EA and result
# is ndarray, e.g. IntegerArray, SparseArray
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)


Expand Down