Skip to content

Commit d09268b

Browse files
committed
Created enum for rank tiebreakers
1 parent 7c5e481 commit d09268b

File tree

5 files changed

+47
-56
lines changed

5 files changed

+47
-56
lines changed

pandas/_libs/algos.pxd

+7-7
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
1212
b[0] = t
1313
return 0
1414

15-
cdef:
16-
int TIEBREAK_AVERAGE = 0
17-
int TIEBREAK_MIN = 1
18-
int TIEBREAK_MAX = 2
19-
int TIEBREAK_FIRST = 3
20-
int TIEBREAK_FIRST_DESCENDING = 4
21-
int TIEBREAK_DENSE = 5
15+
cdef enum TiebreakEnumType:
16+
TIEBREAK_AVERAGE
17+
TIEBREAK_MIN,
18+
TIEBREAK_MAX
19+
TIEBREAK_FIRST
20+
TIEBREAK_FIRST_DESCENDING
21+
TIEBREAK_DENSE

pandas/_libs/algos.pyx

+5-13
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,12 @@ cdef double nan = NaN
3131

3232
cdef int64_t iNaT = get_nat()
3333

34-
cdef:
35-
int TIEBREAK_AVERAGE = 0
36-
int TIEBREAK_MIN = 1
37-
int TIEBREAK_MAX = 2
38-
int TIEBREAK_FIRST = 3
39-
int TIEBREAK_FIRST_DESCENDING = 4
40-
int TIEBREAK_DENSE = 5
41-
4234
tiebreakers = {
43-
'average': TIEBREAK_AVERAGE,
44-
'min': TIEBREAK_MIN,
45-
'max': TIEBREAK_MAX,
46-
'first': TIEBREAK_FIRST,
47-
'dense': TIEBREAK_DENSE,
35+
'average': TiebreakEnumType.TIEBREAK_AVERAGE,
36+
'min': TiebreakEnumType.TIEBREAK_MIN,
37+
'max': TiebreakEnumType.TIEBREAK_MAX,
38+
'first': TiebreakEnumType.TIEBREAK_FIRST,
39+
'dense': TiebreakEnumType.TIEBREAK_DENSE,
4840
}
4941

5042

pandas/_libs/algos_rank_helper.pxi.in

+22-22
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,11 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
121121
_values = np.asarray(list(zip(order[0], order[1])), dtype=_dt)
122122
_as = np.argsort(_values, kind='mergesort', order=('mask', 'values'))
123123
{{else}}
124-
if tiebreak == TIEBREAK_FIRST:
124+
if tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
125125
# need to use a stable sort here
126126
_as = np.lexsort(keys=order)
127127
if not ascending:
128-
tiebreak = TIEBREAK_FIRST_DESCENDING
128+
tiebreak = TiebreakEnumType.TIEBREAK_FIRST_DESCENDING
129129
else:
130130
_as = np.lexsort(keys=order)
131131
{{endif}}
@@ -154,21 +154,21 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
154154
if (i == n - 1 or
155155
are_diff(util.get_value_at(sorted_data, i + 1), val) or
156156
i == non_na_idx - 1):
157-
if tiebreak == TIEBREAK_AVERAGE:
157+
if tiebreak == TiebreakEnumType.TIEBREAK_AVERAGE:
158158
for j in range(i - dups + 1, i + 1):
159159
ranks[argsorted[j]] = sum_ranks / dups
160-
elif tiebreak == TIEBREAK_MIN:
160+
elif tiebreak == TiebreakEnumType.TIEBREAK_MIN:
161161
for j in range(i - dups + 1, i + 1):
162162
ranks[argsorted[j]] = i - dups + 2
163-
elif tiebreak == TIEBREAK_MAX:
163+
elif tiebreak == TiebreakEnumType.TIEBREAK_MAX:
164164
for j in range(i - dups + 1, i + 1):
165165
ranks[argsorted[j]] = i + 1
166-
elif tiebreak == TIEBREAK_FIRST:
166+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
167167
raise ValueError('first not supported for non-numeric data')
168-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
168+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST_DESCENDING:
169169
for j in range(i - dups + 1, i + 1):
170170
ranks[argsorted[j]] = 2 * i - j - dups + 2
171-
elif tiebreak == TIEBREAK_DENSE:
171+
elif tiebreak == TiebreakEnumType.TIEBREAK_DENSE:
172172
total_tie_count += 1
173173
for j in range(i - dups + 1, i + 1):
174174
ranks[argsorted[j]] = total_tie_count
@@ -191,22 +191,22 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
191191

192192
if (i == n - 1 or sorted_data[i + 1] != val or
193193
i == non_na_idx - 1):
194-
if tiebreak == TIEBREAK_AVERAGE:
194+
if tiebreak == TiebreakEnumType.TIEBREAK_AVERAGE:
195195
for j in range(i - dups + 1, i + 1):
196196
ranks[argsorted[j]] = sum_ranks / dups
197-
elif tiebreak == TIEBREAK_MIN:
197+
elif tiebreak == TiebreakEnumType.TIEBREAK_MIN:
198198
for j in range(i - dups + 1, i + 1):
199199
ranks[argsorted[j]] = i - dups + 2
200-
elif tiebreak == TIEBREAK_MAX:
200+
elif tiebreak == TiebreakEnumType.TIEBREAK_MAX:
201201
for j in range(i - dups + 1, i + 1):
202202
ranks[argsorted[j]] = i + 1
203-
elif tiebreak == TIEBREAK_FIRST:
203+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
204204
for j in range(i - dups + 1, i + 1):
205205
ranks[argsorted[j]] = j + 1
206-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
206+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST_DESCENDING:
207207
for j in range(i - dups + 1, i + 1):
208208
ranks[argsorted[j]] = 2 * i - j - dups + 2
209-
elif tiebreak == TIEBREAK_DENSE:
209+
elif tiebreak == TiebreakEnumType.TIEBREAK_DENSE:
210210
total_tie_count += 1
211211
for j in range(i - dups + 1, i + 1):
212212
ranks[argsorted[j]] = total_tie_count
@@ -300,11 +300,11 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
300300
else:
301301
return ranks
302302
{{else}}
303-
if tiebreak == TIEBREAK_FIRST:
303+
if tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
304304
# need to use a stable sort here
305305
_as = values.argsort(axis=1, kind='mergesort')
306306
if not ascending:
307-
tiebreak = TIEBREAK_FIRST_DESCENDING
307+
tiebreak = TiebreakEnumType.TIEBREAK_FIRST_DESCENDING
308308
else:
309309
_as = values.argsort(1)
310310
{{endif}}
@@ -359,27 +359,27 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
359359
{{else}}
360360
if j == k - 1 or values[i, j + 1] != val:
361361
{{endif}}
362-
if tiebreak == TIEBREAK_AVERAGE:
362+
if tiebreak == TiebreakEnumType.TIEBREAK_AVERAGE:
363363
for z in range(j - dups + 1, j + 1):
364364
ranks[i, argsorted[i, z]] = sum_ranks / dups
365-
elif tiebreak == TIEBREAK_MIN:
365+
elif tiebreak == TiebreakEnumType.TIEBREAK_MIN:
366366
for z in range(j - dups + 1, j + 1):
367367
ranks[i, argsorted[i, z]] = j - dups + 2
368-
elif tiebreak == TIEBREAK_MAX:
368+
elif tiebreak == TiebreakEnumType.TIEBREAK_MAX:
369369
for z in range(j - dups + 1, j + 1):
370370
ranks[i, argsorted[i, z]] = j + 1
371-
elif tiebreak == TIEBREAK_FIRST:
371+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
372372
{{if dtype == 'object'}}
373373
raise ValueError('first not supported '
374374
'for non-numeric data')
375375
{{else}}
376376
for z in range(j - dups + 1, j + 1):
377377
ranks[i, argsorted[i, z]] = z + 1
378378
{{endif}}
379-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
379+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST_DESCENDING:
380380
for z in range(j - dups + 1, j + 1):
381381
ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2
382-
elif tiebreak == TIEBREAK_DENSE:
382+
elif tiebreak == TiebreakEnumType.TIEBREAK_DENSE:
383383
total_tie_count += 1
384384
for z in range(j - dups + 1, j + 1):
385385
ranks[i, argsorted[i, z]] = total_tie_count

pandas/_libs/groupby.pyx

+7-8
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ from libc.math cimport isnan
1717
from libc.stdlib cimport malloc, free
1818

1919
from util cimport numeric, get_nat
20-
from algos cimport (swap, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX,
21-
TIEBREAK_FIRST, TIEBREAK_DENSE)
20+
from algos cimport swap, TiebreakEnumType
2221
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
2322

2423
cdef int64_t iNaT = get_nat()
@@ -132,7 +131,7 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
132131
Only transforms on axis=0
133132
"""
134133
cdef:
135-
int tiebreak
134+
TiebreakEnumType tiebreak
136135
Py_ssize_t i, j, N, K
137136
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, grp_vals_seen=1
138137
int64_t grp_na_count=0
@@ -176,22 +175,22 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
176175
grp_na_count += 1
177176
out[_as[i], 0] = np.nan
178177
else:
179-
if tiebreak == TIEBREAK_AVERAGE:
178+
if tiebreak == TiebreakEnumType.TIEBREAK_AVERAGE:
180179
for j in range(i - dups + 1, i + 1):
181180
out[_as[j], 0] = sum_ranks / dups
182-
elif tiebreak == TIEBREAK_MIN:
181+
elif tiebreak == TiebreakEnumType.TIEBREAK_MIN:
183182
for j in range(i - dups + 1, i + 1):
184183
out[_as[j], 0] = i - grp_start - dups + 2
185-
elif tiebreak == TIEBREAK_MAX:
184+
elif tiebreak == TiebreakEnumType.TIEBREAK_MAX:
186185
for j in range(i - dups + 1, i + 1):
187186
out[_as[j], 0] = i - grp_start + 1
188-
elif tiebreak == TIEBREAK_FIRST:
187+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
189188
for j in range(i - dups + 1, i + 1):
190189
if ascending:
191190
out[_as[j], 0] = j + 1 - grp_start
192191
else:
193192
out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
194-
elif tiebreak == TIEBREAK_DENSE:
193+
elif tiebreak == TiebreakEnumType.TIEBREAK_DENSE:
195194
for j in range(i - dups + 1, i + 1):
196195
out[_as[j], 0] = grp_vals_seen
197196

pandas/_libs/groupby_helper.pxi.in

+6-6
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
456456
Only transforms on axis=0
457457
"""
458458
cdef:
459-
int tiebreak
459+
TiebreakEnumType tiebreak
460460
Py_ssize_t i, j, N, K
461461
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, grp_vals_seen=1
462462
int64_t grp_na_count=0
@@ -507,22 +507,22 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
507507
grp_na_count += 1
508508
out[_as[i], 0] = nan
509509
else:
510-
if tiebreak == TIEBREAK_AVERAGE:
510+
if tiebreak == TiebreakEnumType.TIEBREAK_AVERAGE:
511511
for j in range(i - dups + 1, i + 1):
512512
out[_as[j], 0] = sum_ranks / dups
513-
elif tiebreak == TIEBREAK_MIN:
513+
elif tiebreak == TiebreakEnumType.TIEBREAK_MIN:
514514
for j in range(i - dups + 1, i + 1):
515515
out[_as[j], 0] = i - grp_start - dups + 2
516-
elif tiebreak == TIEBREAK_MAX:
516+
elif tiebreak == TiebreakEnumType.TIEBREAK_MAX:
517517
for j in range(i - dups + 1, i + 1):
518518
out[_as[j], 0] = i - grp_start + 1
519-
elif tiebreak == TIEBREAK_FIRST:
519+
elif tiebreak == TiebreakEnumType.TIEBREAK_FIRST:
520520
for j in range(i - dups + 1, i + 1):
521521
if ascending:
522522
out[_as[j], 0] = j + 1 - grp_start
523523
else:
524524
out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
525-
elif tiebreak == TIEBREAK_DENSE:
525+
elif tiebreak == TiebreakEnumType.TIEBREAK_DENSE:
526526
for j in range(i - dups + 1, i + 1):
527527
out[_as[j], 0] = grp_vals_seen
528528

0 commit comments

Comments
 (0)