Skip to content

CLN/TST remove compat.scipy #7296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 0 additions & 159 deletions pandas/compat/scipy.py

This file was deleted.

89 changes: 88 additions & 1 deletion pandas/stats/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,94 @@ def correl_ts(frame1, frame2):
def correl_xs(frame1, frame2):
return correl_ts(frame1.T, frame2.T)

def percentileofscore(a, score, kind='rank'):
"""The percentile rank of a score relative to a list of scores.

A `percentileofscore` of, for example, 80% means that 80% of the
scores in `a` are below the given score. In the case of gaps or
ties, the exact definition depends on the optional keyword, `kind`.

Parameters
----------
a: array like
Array of scores to which `score` is compared.
score: int or float
Score that is compared to the elements in `a`.
kind: {'rank', 'weak', 'strict', 'mean'}, optional
This optional parameter specifies the interpretation of the
resulting score:

- "rank": Average percentage ranking of score. In case of
multiple matches, average the percentage rankings of
all matching scores.
- "weak": This kind corresponds to the definition of a cumulative
distribution function. A percentileofscore of 80%
means that 80% of values are less than or equal
to the provided score.
- "strict": Similar to "weak", except that only values that are
strictly less than the given score are counted.
- "mean": The average of the "weak" and "strict" scores, often used in
testing. See

http://en.wikipedia.org/wiki/Percentile_rank

Returns
-------
pcos : float
Percentile-position of score (0-100) relative to `a`.

Examples
--------
Three-quarters of the given values lie below a given score:

>>> percentileofscore([1, 2, 3, 4], 3)
75.0

With multiple matches, note how the scores of the two matches, 0.6
and 0.8 respectively, are averaged:

>>> percentileofscore([1, 2, 3, 3, 4], 3)
70.0

Only 2/5 values are strictly less than 3:

>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='strict')
40.0

But 4/5 values are less than or equal to 3:

>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='weak')
80.0

The average between the weak and the strict scores is

>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean')
60.0

"""
a = np.array(a)
n = len(a)

if kind == 'rank':
if not(np.any(a == score)):
a = np.append(a, score)
a_len = np.array(lrange(len(a)))
else:
a_len = np.array(lrange(len(a))) + 1.0

a = np.sort(a)
idx = [a == score]
pct = (np.mean(a_len[idx]) / n) * 100.0
return pct

elif kind == 'strict':
return sum(a < score) / float(n) * 100
elif kind == 'weak':
return sum(a <= score) / float(n) * 100
elif kind == 'mean':
return (sum(a < score) + sum(a <= score)) * 50 / float(n)
else:
raise ValueError("kind can only be 'rank', 'strict', 'weak' or 'mean'")

def percentileRank(frame, column=None, kind='mean'):
"""
Expand Down Expand Up @@ -76,7 +164,6 @@ def percentileRank(frame, column=None, kind='mean'):
-------
TimeSeries or DataFrame, depending on input
"""
from pandas.compat.scipy import percentileofscore
fun = lambda xs, score: percentileofscore(remove_na(xs),
score, kind=kind)

Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11142,7 +11142,8 @@ def test_cumprod(self):
df.cumprod(1)

def test_rank(self):
from pandas.compat.scipy import rankdata
_skip_if_no_scipy()
from scipy.stats import rankdata

self.frame['A'][::2] = np.nan
self.frame['B'][::3] = np.nan
Expand Down Expand Up @@ -11235,7 +11236,8 @@ def test_rank2(self):


def test_rank_na_option(self):
from pandas.compat.scipy import rankdata
_skip_if_no_scipy()
from scipy.stats import rankdata

self.frame['A'][::2] = np.nan
self.frame['B'][::3] = np.nan
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4063,7 +4063,8 @@ def test_nsmallest_nlargest(self):
assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]])

def test_rank(self):
from pandas.compat.scipy import rankdata
_skip_if_no_scipy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should these last two also be _skip_if_no_scipy() ?

from scipy.stats import rankdata

self.ts[::2] = np.nan
self.ts[:10][::3] = 4.
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/test_tseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
import pandas.algos as algos
from datetime import datetime

def _skip_if_no_scipy():
try:
import scipy.stats
except ImportError:
raise nose.SkipTest("scipy not installed")

class TestTseriesUtil(tm.TestCase):
_multiprocess_can_split_ = True

Expand Down Expand Up @@ -335,7 +341,8 @@ def test_convert_objects_complex_number():


def test_rank():
from pandas.compat.scipy import rankdata
_skip_if_no_scipy()
from scipy.stats import rankdata

def _check(arr):
mask = ~np.isfinite(arr)
Expand Down