CLN: use numpy quantile in qcut (#43991)

fangchenli · web-flow · commit 1d74b3e30dc5 · 2021-10-12T15:27:23.000-04:00
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1117,89 +1117,6 @@ def checked_add_with_arr(
     return arr + b
 
 
-def quantile(x, q, interpolation_method="fraction"):
-    """
-    Compute sample quantile or quantiles of the input array. For example, q=0.5
-    computes the median.
-
-    The `interpolation_method` parameter supports three values, namely
-    `fraction` (default), `lower` and `higher`. Interpolation is done only,
-    if the desired quantile lies between two data points `i` and `j`. For
-    `fraction`, the result is an interpolated value between `i` and `j`;
-    for `lower`, the result is `i`, for `higher` the result is `j`.
-
-    Parameters
-    ----------
-    x : ndarray
-        Values from which to extract score.
-    q : scalar or array
-        Percentile at which to extract score.
-    interpolation_method : {'fraction', 'lower', 'higher'}, optional
-        This optional parameter specifies the interpolation method to use,
-        when the desired quantile lies between two data points `i` and `j`:
-
-        - fraction: `i + (j - i)*fraction`, where `fraction` is the
-                    fractional part of the index surrounded by `i` and `j`.
-        -lower: `i`.
-        - higher: `j`.
-
-    Returns
-    -------
-    score : float
-        Score at percentile.
-
-    Examples
-    --------
-    >>> from scipy import stats
-    >>> a = np.arange(100)
-    >>> stats.scoreatpercentile(a, 50)
-    49.5
-
-    """
-    x = np.asarray(x)
-    mask = isna(x)
-
-    x = x[~mask]
-
-    values = np.sort(x)
-
-    def _interpolate(a, b, fraction):
-        """
-        Returns the point at the given fraction between a and b, where
-        'fraction' must be between 0 and 1.
-        """
-        return a + (b - a) * fraction
-
-    def _get_score(at):
-        if len(values) == 0:
-            return np.nan
-
-        idx = at * (len(values) - 1)
-        if idx % 1 == 0:
-            score = values[int(idx)]
-        else:
-            if interpolation_method == "fraction":
-                score = _interpolate(values[int(idx)], values[int(idx) + 1], idx % 1)
-            elif interpolation_method == "lower":
-                score = values[np.floor(idx)]
-            elif interpolation_method == "higher":
-                score = values[np.ceil(idx)]
-            else:
-                raise ValueError(
-                    "interpolation_method can only be 'fraction' "
-                    ", 'lower' or 'higher'"
-                )
-
-        return score
-
-    if is_scalar(q):
-        return _get_score(q)
-
-    q = np.asarray(q, np.float64)
-    result = [_get_score(x) for x in q]
-    return np.array(result, dtype=np.float64)
-
-
 # --------------- #
 # select n        #
 # --------------- #
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -367,11 +367,12 @@ def qcut(
     x = _preprocess_for_cut(x)
     x, dtype = _coerce_to_type(x)
 
-    if is_integer(q):
-        quantiles = np.linspace(0, 1, q + 1)
-    else:
-        quantiles = q
-    bins = algos.quantile(x, quantiles)
+    quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
+
+    x_np = np.asarray(x)
+    x_np = x_np[~np.isnan(x_np)]
+    bins = np.quantile(x_np, quantiles)
+
     fac, bins = _bins_to_cuts(
         x,
         bins,
diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py
@@ -21,7 +21,6 @@
 )
 import pandas._testing as tm
 from pandas.api.types import CategoricalDtype as CDT
-from pandas.core.algorithms import quantile
 
 from pandas.tseries.offsets import (
     Day,
@@ -34,8 +33,8 @@ def test_qcut():
 
     # We store the bins as Index that have been
     # rounded to comparisons are a bit tricky.
-    labels, bins = qcut(arr, 4, retbins=True)
-    ex_bins = quantile(arr, [0, 0.25, 0.5, 0.75, 1.0])
+    labels, _ = qcut(arr, 4, retbins=True)
+    ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0])
 
     result = labels.categories.left.values
     assert np.allclose(result, ex_bins[:-1], atol=1e-2)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1731,14 +1731,6 @@ def test_hashtable_large_sizehint(self, hashtable):
         tbl = hashtable(size_hint=size_hint)  # noqa
 
 
-def test_quantile():
-    s = Series(np.random.randn(100))
-
-    result = algos.quantile(s, [0, 0.25, 0.5, 0.75, 1.0])
-    expected = algos.quantile(s.values, [0, 0.25, 0.5, 0.75, 1.0])
-    tm.assert_almost_equal(result, expected)
-
-
 def test_unique_label_indices():
 
     a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)