Skip to content

Commit 19e15ce

Browse files
committed
CLN/TST Remove compat.scipy
1 parent 09723ca commit 19e15ce

File tree

5 files changed

+102
-164
lines changed

5 files changed

+102
-164
lines changed

pandas/compat/scipy.py

-159
This file was deleted.

pandas/stats/misc.py

+88-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,94 @@ def correl_ts(frame1, frame2):
4242
def correl_xs(frame1, frame2):
4343
return correl_ts(frame1.T, frame2.T)
4444

45+
def percentileofscore(a, score, kind='rank'):
46+
"""The percentile rank of a score relative to a list of scores.
47+
48+
A `percentileofscore` of, for example, 80% means that 80% of the
49+
scores in `a` are below the given score. In the case of gaps or
50+
ties, the exact definition depends on the optional keyword, `kind`.
51+
52+
Parameters
53+
----------
54+
a: array like
55+
Array of scores to which `score` is compared.
56+
score: int or float
57+
Score that is compared to the elements in `a`.
58+
kind: {'rank', 'weak', 'strict', 'mean'}, optional
59+
This optional parameter specifies the interpretation of the
60+
resulting score:
61+
62+
- "rank": Average percentage ranking of score. In case of
63+
multiple matches, average the percentage rankings of
64+
all matching scores.
65+
- "weak": This kind corresponds to the definition of a cumulative
66+
distribution function. A percentileofscore of 80%
67+
means that 80% of values are less than or equal
68+
to the provided score.
69+
- "strict": Similar to "weak", except that only values that are
70+
strictly less than the given score are counted.
71+
- "mean": The average of the "weak" and "strict" scores, often used in
72+
testing. See
73+
74+
http://en.wikipedia.org/wiki/Percentile_rank
75+
76+
Returns
77+
-------
78+
pcos : float
79+
Percentile-position of score (0-100) relative to `a`.
80+
81+
Examples
82+
--------
83+
Three-quarters of the given values lie below a given score:
84+
85+
>>> percentileofscore([1, 2, 3, 4], 3)
86+
75.0
87+
88+
With multiple matches, note how the scores of the two matches, 0.6
89+
and 0.8 respectively, are averaged:
90+
91+
>>> percentileofscore([1, 2, 3, 3, 4], 3)
92+
70.0
93+
94+
Only 2/5 values are strictly less than 3:
95+
96+
>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='strict')
97+
40.0
98+
99+
But 4/5 values are less than or equal to 3:
100+
101+
>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='weak')
102+
80.0
103+
104+
The average between the weak and the strict scores is
105+
106+
>>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean')
107+
60.0
108+
109+
"""
110+
a = np.array(a)
111+
n = len(a)
112+
113+
if kind == 'rank':
114+
if not(np.any(a == score)):
115+
a = np.append(a, score)
116+
a_len = np.array(lrange(len(a)))
117+
else:
118+
a_len = np.array(lrange(len(a))) + 1.0
119+
120+
a = np.sort(a)
121+
idx = [a == score]
122+
pct = (np.mean(a_len[idx]) / n) * 100.0
123+
return pct
124+
125+
elif kind == 'strict':
126+
return sum(a < score) / float(n) * 100
127+
elif kind == 'weak':
128+
return sum(a <= score) / float(n) * 100
129+
elif kind == 'mean':
130+
return (sum(a < score) + sum(a <= score)) * 50 / float(n)
131+
else:
132+
raise ValueError("kind can only be 'rank', 'strict', 'weak' or 'mean'")
45133

46134
def percentileRank(frame, column=None, kind='mean'):
47135
"""
@@ -76,7 +164,6 @@ def percentileRank(frame, column=None, kind='mean'):
76164
-------
77165
TimeSeries or DataFrame, depending on input
78166
"""
79-
from pandas.compat.scipy import percentileofscore
80167
fun = lambda xs, score: percentileofscore(remove_na(xs),
81168
score, kind=kind)
82169

pandas/tests/test_frame.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -11142,7 +11142,8 @@ def test_cumprod(self):
1114211142
df.cumprod(1)
1114311143

1114411144
def test_rank(self):
11145-
from pandas.compat.scipy import rankdata
11145+
_skip_if_no_scipy()
11146+
from scipy.stats import rankdata
1114611147

1114711148
self.frame['A'][::2] = np.nan
1114811149
self.frame['B'][::3] = np.nan
@@ -11235,7 +11236,8 @@ def test_rank2(self):
1123511236

1123611237

1123711238
def test_rank_na_option(self):
11238-
from pandas.compat.scipy import rankdata
11239+
_skip_if_no_scipy()
11240+
from scipy.stats import rankdata
1123911241

1124011242
self.frame['A'][::2] = np.nan
1124111243
self.frame['B'][::3] = np.nan

pandas/tests/test_series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4063,7 +4063,8 @@ def test_nsmallest_nlargest(self):
40634063
assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]])
40644064

40654065
def test_rank(self):
4066-
from pandas.compat.scipy import rankdata
4066+
_skip_if_no_scipy()
4067+
from scipy.stats import rankdata
40674068

40684069
self.ts[::2] = np.nan
40694070
self.ts[:10][::3] = 4.

pandas/tests/test_tseries.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
import pandas.algos as algos
1010
from datetime import datetime
1111

12+
def _skip_if_no_scipy():
13+
try:
14+
import scipy.stats
15+
except ImportError:
16+
raise nose.SkipTest("scipy not installed")
17+
1218
class TestTseriesUtil(tm.TestCase):
1319
_multiprocess_can_split_ = True
1420

@@ -335,7 +341,8 @@ def test_convert_objects_complex_number():
335341

336342

337343
def test_rank():
338-
from pandas.compat.scipy import rankdata
344+
_skip_if_no_scipy()
345+
from scipy.stats import rankdata
339346

340347
def _check(arr):
341348
mask = ~np.isfinite(arr)

0 commit comments

Comments
 (0)