Skip to content

Commit 1cdd08b

Browse files
committed
ENH: stats.scoreatpercentile
stats.scoreatpercentile now accepts sequences of percentiles and axis parameter. Function adapted from NumPy's percentile function.
1 parent d47bc96 commit 1cdd08b

File tree

2 files changed

+104
-53
lines changed

2 files changed

+104
-53
lines changed

scipy/stats/stats.py

+59-26
Original file line numberDiff line numberDiff line change
@@ -1404,13 +1404,8 @@ def itemfreq(a):
14041404
return array(_support.abut(scores, freq))
14051405

14061406

1407-
def _interpolate(a, b, fraction):
1408-
"""Returns the point at the given fraction between a and b, where
1409-
'fraction' must be between 0 and 1.
1410-
"""
1411-
return a + (b - a)*fraction;
1412-
1413-
def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'):
1407+
def scoreatpercentile(a, per, limit=(), interpolation_method='fraction',
1408+
axis=None):
14141409
"""
14151410
Calculate the score at the given `per` percentile of the sequence `a`.
14161411
@@ -1428,9 +1423,9 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'):
14281423
14291424
Parameters
14301425
----------
1431-
a : ndarray
1426+
a : array_like
14321427
Values from which to extract score.
1433-
per : scalar
1428+
per : scalar in range [0,100] (or sequence of floats)
14341429
Percentile at which to extract score.
14351430
limit : tuple, optional
14361431
Tuple of two scalars, the lower and upper limits within which to
@@ -1443,10 +1438,13 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'):
14431438
fractional part of the index surrounded by `i` and `j`.
14441439
-lower: `i`.
14451440
- higher: `j`.
1441+
axis : int, optional
1442+
Axis along which the percentiles are computed. The default (None)
1443+
is to compute the median along a flattened version of the array.
14461444
14471445
Returns
14481446
-------
1449-
score : float
1447+
score : float (or sequence of floats)
14501448
Score at percentile.
14511449
14521450
See Also
@@ -1461,29 +1459,64 @@ def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'):
14611459
49.5
14621460
14631461
"""
1464-
# TODO: this should be a simple wrapper around a well-written quantile
1465-
# function. GNU R provides 9 quantile algorithms (!), with differing
1466-
# behaviour at, for example, discontinuities.
1467-
values = np.sort(a, axis=0)
1462+
# adapted from NumPy's percentile function
1463+
a = np.asarray(a)
1464+
14681465
if limit:
1469-
values = values[(limit[0] <= values) & (values <= limit[1])]
1466+
a = a[(limit[0] <= a) & (a <= limit[1])]
14701467

1471-
idx = per /100. * (values.shape[0] - 1)
1472-
if (idx % 1 == 0):
1473-
score = values[idx]
1474-
else:
1475-
if interpolation_method == 'fraction':
1476-
score = _interpolate(values[int(idx)], values[int(idx) + 1],
1477-
idx % 1)
1478-
elif interpolation_method == 'lower':
1479-
score = values[np.floor(idx)]
1468+
if per == 0:
1469+
return a.min(axis=axis)
1470+
elif per == 100:
1471+
return a.max(axis=axis)
1472+
1473+
sorted = np.sort(a, axis=axis)
1474+
if axis is None:
1475+
axis = 0
1476+
1477+
return _compute_qth_percentile(sorted, per, interpolation_method, axis)
1478+
1479+
1480+
# handle sequence of per's without calling sort multiple times
1481+
def _compute_qth_percentile(sorted, per, interpolation_method, axis):
1482+
if not np.isscalar(per):
1483+
return [_compute_qth_percentile(sorted, i, interpolation_method, axis)
1484+
for i in per]
1485+
1486+
if (per < 0) or (per > 100):
1487+
raise ValueError("percentile must be in the range [0, 100]")
1488+
1489+
indexer = [slice(None)] * sorted.ndim
1490+
idx = per / 100. * (sorted.shape[axis] - 1)
1491+
1492+
if int(idx) != idx:
1493+
# round fractional indices according to interpolation method
1494+
if interpolation_method == 'lower':
1495+
idx = np.floor(idx)
14801496
elif interpolation_method == 'higher':
1481-
score = values[np.ceil(idx)]
1497+
idx = np.ceil(idx)
1498+
elif interpolation_method == 'fraction':
1499+
pass # keep idx as fraction and interpolate
14821500
else:
14831501
raise ValueError("interpolation_method can only be 'fraction', " \
14841502
"'lower' or 'higher'")
14851503

1486-
return score
1504+
i = int(idx)
1505+
if i == idx:
1506+
indexer[axis] = slice(i, i + 1)
1507+
weights = array(1)
1508+
sumval = 1.0
1509+
else:
1510+
indexer[axis] = slice(i, i + 2)
1511+
j = i + 1
1512+
weights = array([(j - idx), (idx - i)], float)
1513+
wshape = [1] * sorted.ndim
1514+
wshape[axis] = 2
1515+
weights.shape = wshape
1516+
sumval = weights.sum()
1517+
1518+
# Use np.add.reduce to coerce data type
1519+
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
14871520

14881521

14891522
def percentileofscore(a, score, kind='rank'):

scipy/stats/tests/test_stats.py

+45-27
Original file line numberDiff line numberDiff line change
@@ -993,9 +993,9 @@ def test_2D_array_dim1(self):
993993

994994
class TestScoreatpercentile(TestCase):
995995
def setUp(self):
996-
self.a1 = [3,4,5,10,-3,-5,6]
997-
self.a2 = [3,-6,-2,8,7,4,2,1]
998-
self.a3 = [3.,4,5,10,-3,-5,-6,7.0]
996+
self.a1 = [3, 4, 5, 10, -3, -5, 6]
997+
self.a2 = [3, -6, -2, 8, 7, 4, 2, 1]
998+
self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0]
999999

10001000
def test_basic(self):
10011001
x = arange(8) * 0.5
@@ -1009,33 +1009,29 @@ def test_2D(self):
10091009
[4, 4, 3],
10101010
[1, 1, 1],
10111011
[1, 1, 1]])
1012-
assert_array_equal(stats.scoreatpercentile(x, 50), [1,1,1])
1012+
assert_array_equal(stats.scoreatpercentile(x, 50), [1, 1, 1])
10131013

10141014
def test_fraction(self):
10151015
scoreatperc = stats.scoreatpercentile
10161016

10171017
# Test defaults
10181018
assert_equal(scoreatperc(range(10), 50), 4.5)
1019-
assert_equal(scoreatperc(range(10), 50, (2,7)), 4.5)
1019+
assert_equal(scoreatperc(range(10), 50, (2, 7)), 4.5)
10201020
assert_equal(scoreatperc(range(100), 50, limit=(1, 8)), 4.5)
1021-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (10,100)), 55)
1022-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (1,10)), 5.5)
1021+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100)), 55)
1022+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10)), 5.5)
10231023

10241024
# explicitly specify interpolation_method 'fraction' (the default)
1025-
assert_equal(scoreatperc(range(10), 50, interpolation_method='fraction'),
1026-
4.5)
1025+
assert_equal(scoreatperc(range(10), 50,
1026+
interpolation_method='fraction'), 4.5)
10271027
assert_equal(scoreatperc(range(10), 50, limit=(2, 7),
1028-
interpolation_method='fraction'),
1029-
4.5)
1028+
interpolation_method='fraction'), 4.5)
10301029
assert_equal(scoreatperc(range(100), 50, limit=(1, 8),
1031-
interpolation_method='fraction'),
1032-
4.5)
1033-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (10, 100),
1034-
interpolation_method='fraction'),
1035-
55)
1036-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (1,10),
1037-
interpolation_method='fraction'),
1038-
5.5)
1030+
interpolation_method='fraction'), 4.5)
1031+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100),
1032+
interpolation_method='fraction'), 55)
1033+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10),
1034+
interpolation_method='fraction'), 5.5)
10391035

10401036
def test_lower_higher(self):
10411037
scoreatperc = stats.scoreatpercentile
@@ -1045,23 +1041,45 @@ def test_lower_higher(self):
10451041
interpolation_method='lower'), 4)
10461042
assert_equal(scoreatperc(range(10), 50,
10471043
interpolation_method='higher'), 5)
1048-
assert_equal(scoreatperc(range(10), 50, (2,7),
1044+
assert_equal(scoreatperc(range(10), 50, (2, 7),
10491045
interpolation_method='lower'), 4)
1050-
assert_equal(scoreatperc(range(10), 50, limit=(2,7),
1046+
assert_equal(scoreatperc(range(10), 50, limit=(2, 7),
10511047
interpolation_method='higher'), 5)
1052-
assert_equal(scoreatperc(range(100), 50, (1,8),
1048+
assert_equal(scoreatperc(range(100), 50, (1, 8),
10531049
interpolation_method='lower'), 4)
1054-
assert_equal(scoreatperc(range(100), 50, (1,8),
1050+
assert_equal(scoreatperc(range(100), 50, (1, 8),
10551051
interpolation_method='higher'), 5)
1056-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (10,100),
1052+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100),
10571053
interpolation_method='lower'), 10)
1058-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, limit=(10, 100),
1054+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(10, 100),
10591055
interpolation_method='higher'), 100)
1060-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, (1,10),
1056+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10),
10611057
interpolation_method='lower'), 1)
1062-
assert_equal(scoreatperc(np.array([1, 10 ,100]), 50, limit=(1,10),
1058+
assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10),
10631059
interpolation_method='higher'), 10)
10641060

1061+
def test_sequence(self):
1062+
x = arange(8) * 0.5
1063+
assert_equal(stats.scoreatpercentile(x, [0, 100, 50]), [0, 3.5, 1.75])
1064+
1065+
def test_axis(self):
1066+
scoreatperc = stats.scoreatpercentile
1067+
x = arange(12).reshape(3, 4)
1068+
1069+
assert_equal(scoreatperc(x, (25, 50, 100)), [2.75, 5.5, 11.0])
1070+
1071+
r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]]
1072+
assert_equal(scoreatperc(x, (25, 50, 100), axis=0), r0)
1073+
1074+
r1 = [[0.75, 4.75, 8.75], [1.5, 5.5, 9.5], [3, 7, 11]]
1075+
assert_equal(scoreatperc(x, (25, 50, 100), axis=1), r1)
1076+
1077+
def test_exception(self):
1078+
assert_raises(ValueError, stats.scoreatpercentile, [1, 2], 56,
1079+
interpolation_method='foobar')
1080+
assert_raises(ValueError, stats.scoreatpercentile, [1], 101)
1081+
assert_raises(ValueError, stats.scoreatpercentile, [1], -1)
1082+
10651083

10661084
class TestCMedian(TestCase):
10671085
def test_basic(self):

0 commit comments

Comments
 (0)