@@ -42,6 +42,94 @@ def correl_ts(frame1, frame2):
42
42
def correl_xs (frame1 , frame2 ):
43
43
return correl_ts (frame1 .T , frame2 .T )
44
44
45
+ def percentileofscore (a , score , kind = 'rank' ):
46
+ """The percentile rank of a score relative to a list of scores.
47
+
48
+ A `percentileofscore` of, for example, 80% means that 80% of the
49
+ scores in `a` are below the given score. In the case of gaps or
50
+ ties, the exact definition depends on the optional keyword, `kind`.
51
+
52
+ Parameters
53
+ ----------
54
+ a: array like
55
+ Array of scores to which `score` is compared.
56
+ score: int or float
57
+ Score that is compared to the elements in `a`.
58
+ kind: {'rank', 'weak', 'strict', 'mean'}, optional
59
+ This optional parameter specifies the interpretation of the
60
+ resulting score:
61
+
62
+ - "rank": Average percentage ranking of score. In case of
63
+ multiple matches, average the percentage rankings of
64
+ all matching scores.
65
+ - "weak": This kind corresponds to the definition of a cumulative
66
+ distribution function. A percentileofscore of 80%
67
+ means that 80% of values are less than or equal
68
+ to the provided score.
69
+ - "strict": Similar to "weak", except that only values that are
70
+ strictly less than the given score are counted.
71
+ - "mean": The average of the "weak" and "strict" scores, often used in
72
+ testing. See
73
+
74
+ http://en.wikipedia.org/wiki/Percentile_rank
75
+
76
+ Returns
77
+ -------
78
+ pcos : float
79
+ Percentile-position of score (0-100) relative to `a`.
80
+
81
+ Examples
82
+ --------
83
+ Three-quarters of the given values lie below a given score:
84
+
85
+ >>> percentileofscore([1, 2, 3, 4], 3)
86
+ 75.0
87
+
88
+ With multiple matches, note how the scores of the two matches, 0.6
89
+ and 0.8 respectively, are averaged:
90
+
91
+ >>> percentileofscore([1, 2, 3, 3, 4], 3)
92
+ 70.0
93
+
94
+ Only 2/5 values are strictly less than 3:
95
+
96
+ >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='strict')
97
+ 40.0
98
+
99
+ But 4/5 values are less than or equal to 3:
100
+
101
+ >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='weak')
102
+ 80.0
103
+
104
+ The average between the weak and the strict scores is
105
+
106
+ >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean')
107
+ 60.0
108
+
109
+ """
110
+ a = np .array (a )
111
+ n = len (a )
112
+
113
+ if kind == 'rank' :
114
+ if not (np .any (a == score )):
115
+ a = np .append (a , score )
116
+ a_len = np .array (lrange (len (a )))
117
+ else :
118
+ a_len = np .array (lrange (len (a ))) + 1.0
119
+
120
+ a = np .sort (a )
121
+ idx = [a == score ]
122
+ pct = (np .mean (a_len [idx ]) / n ) * 100.0
123
+ return pct
124
+
125
+ elif kind == 'strict' :
126
+ return sum (a < score ) / float (n ) * 100
127
+ elif kind == 'weak' :
128
+ return sum (a <= score ) / float (n ) * 100
129
+ elif kind == 'mean' :
130
+ return (sum (a < score ) + sum (a <= score )) * 50 / float (n )
131
+ else :
132
+ raise ValueError ("kind can only be 'rank', 'strict', 'weak' or 'mean'" )
45
133
46
134
def percentileRank (frame , column = None , kind = 'mean' ):
47
135
"""
@@ -76,7 +164,6 @@ def percentileRank(frame, column=None, kind='mean'):
76
164
-------
77
165
TimeSeries or DataFrame, depending on input
78
166
"""
79
- from pandas .compat .scipy import percentileofscore
80
167
fun = lambda xs , score : percentileofscore (remove_na (xs ),
81
168
score , kind = kind )
82
169
0 commit comments