@@ -68,12 +68,14 @@ cdef:
68
68
int TIEBREAK_MAX = 2
69
69
int TIEBREAK_FIRST = 3
70
70
int TIEBREAK_FIRST_DESCENDING = 4
71
+ int TIEBREAK_DENSE = 5
71
72
72
73
tiebreakers = {
73
74
' average' : TIEBREAK_AVERAGE,
74
75
' min' : TIEBREAK_MIN,
75
76
' max' : TIEBREAK_MAX,
76
- ' first' : TIEBREAK_FIRST
77
+ ' first' : TIEBREAK_FIRST,
78
+ ' dense' : TIEBREAK_DENSE,
77
79
}
78
80
79
81
@@ -137,7 +139,7 @@ def rank_1d_float64(object in_arr, ties_method='average', ascending=True,
137
139
"""
138
140
139
141
cdef:
140
- Py_ssize_t i, j, n, dups = 0
142
+ Py_ssize_t i, j, n, dups = 0 , total_tie_count = 0
141
143
ndarray[float64_t] sorted_data, ranks, values
142
144
ndarray[int64_t] argsorted
143
145
float64_t val, nan_value
@@ -200,6 +202,10 @@ def rank_1d_float64(object in_arr, ties_method='average', ascending=True,
200
202
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
201
203
for j in range (i - dups + 1 , i + 1 ):
202
204
ranks[argsorted[j]] = 2 * i - j - dups + 2
205
+ elif tiebreak == TIEBREAK_DENSE:
206
+ total_tie_count += 1
207
+ for j in range (i - dups + 1 , i + 1 ):
208
+ ranks[argsorted[j]] = total_tie_count
203
209
sum_ranks = dups = 0
204
210
if pct:
205
211
return ranks / count
@@ -214,7 +220,7 @@ def rank_1d_int64(object in_arr, ties_method='average', ascending=True,
214
220
"""
215
221
216
222
cdef:
217
- Py_ssize_t i, j, n, dups = 0
223
+ Py_ssize_t i, j, n, dups = 0 , total_tie_count = 0
218
224
ndarray[int64_t] sorted_data, values
219
225
ndarray[float64_t] ranks
220
226
ndarray[int64_t] argsorted
@@ -265,6 +271,10 @@ def rank_1d_int64(object in_arr, ties_method='average', ascending=True,
265
271
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
266
272
for j in range (i - dups + 1 , i + 1 ):
267
273
ranks[argsorted[j]] = 2 * i - j - dups + 2
274
+ elif tiebreak == TIEBREAK_DENSE:
275
+ total_tie_count += 1
276
+ for j in range (i - dups + 1 , i + 1 ):
277
+ ranks[argsorted[j]] = total_tie_count
268
278
sum_ranks = dups = 0
269
279
if pct:
270
280
return ranks / count
@@ -279,7 +289,7 @@ def rank_2d_float64(object in_arr, axis=0, ties_method='average',
279
289
"""
280
290
281
291
cdef:
282
- Py_ssize_t i, j, z, k, n, dups = 0
292
+ Py_ssize_t i, j, z, k, n, dups = 0 , total_tie_count = 0
283
293
ndarray[float64_t, ndim= 2 ] ranks, values
284
294
ndarray[int64_t, ndim= 2 ] argsorted
285
295
float64_t val, nan_value
@@ -324,6 +334,7 @@ def rank_2d_float64(object in_arr, axis=0, ties_method='average',
324
334
325
335
for i in range (n):
326
336
dups = sum_ranks = 0
337
+ total_tie_count = 0
327
338
for j in range (k):
328
339
sum_ranks += j + 1
329
340
dups += 1
@@ -347,6 +358,10 @@ def rank_2d_float64(object in_arr, axis=0, ties_method='average',
347
358
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
348
359
for z in range (j - dups + 1 , j + 1 ):
349
360
ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2
361
+ elif tiebreak == TIEBREAK_DENSE:
362
+ total_tie_count += 1
363
+ for z in range (j - dups + 1 , j + 1 ):
364
+ ranks[i, argsorted[i, z]] = total_tie_count
350
365
sum_ranks = dups = 0
351
366
352
367
if axis == 0 :
@@ -362,7 +377,7 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
362
377
"""
363
378
364
379
cdef:
365
- Py_ssize_t i, j, z, k, n, dups = 0
380
+ Py_ssize_t i, j, z, k, n, dups = 0 , total_tie_count = 0
366
381
ndarray[float64_t, ndim= 2 ] ranks
367
382
ndarray[int64_t, ndim= 2 ] argsorted
368
383
ndarray[int64_t, ndim= 2 , cast= True ] values
@@ -395,6 +410,7 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
395
410
396
411
for i in range (n):
397
412
dups = sum_ranks = 0
413
+ total_tie_count = 0
398
414
for j in range (k):
399
415
sum_ranks += j + 1
400
416
dups += 1
@@ -415,6 +431,10 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
415
431
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
416
432
for z in range (j - dups + 1 , j + 1 ):
417
433
ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2
434
+ elif tiebreak == TIEBREAK_DENSE:
435
+ total_tie_count += 1
436
+ for z in range (j - dups + 1 , j + 1 ):
437
+ ranks[i, argsorted[i, z]] = total_tie_count
418
438
sum_ranks = dups = 0
419
439
420
440
if axis == 0 :
@@ -430,7 +450,7 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
430
450
"""
431
451
432
452
cdef:
433
- Py_ssize_t i, j, n, dups = 0
453
+ Py_ssize_t i, j, n, dups = 0 , total_tie_count = 0
434
454
ndarray[float64_t] ranks
435
455
ndarray sorted_data, values
436
456
ndarray[int64_t] argsorted
@@ -502,6 +522,10 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
502
522
ranks[argsorted[j]] = i + 1
503
523
elif tiebreak == TIEBREAK_FIRST:
504
524
raise ValueError (' first not supported for non-numeric data' )
525
+ elif tiebreak == TIEBREAK_DENSE:
526
+ total_tie_count += 1
527
+ for j in range (i - dups + 1 , i + 1 ):
528
+ ranks[argsorted[j]] = total_tie_count
505
529
sum_ranks = dups = 0
506
530
if pct:
507
531
ranks / count
@@ -545,6 +569,7 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
545
569
546
570
cdef:
547
571
Py_ssize_t i, j, z, k, n, infs, dups = 0
572
+ Py_ssize_t total_tie_count = 0
548
573
ndarray[float64_t, ndim= 2 ] ranks
549
574
ndarray[object , ndim= 2 ] values
550
575
ndarray[int64_t, ndim= 2 ] argsorted
@@ -600,6 +625,7 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
600
625
601
626
for i in range (n):
602
627
dups = sum_ranks = infs = 0
628
+ total_tie_count = 0
603
629
for j in range (k):
604
630
val = values[i, j]
605
631
if val is nan_value and keep_na:
@@ -621,6 +647,10 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
621
647
elif tiebreak == TIEBREAK_FIRST:
622
648
raise ValueError (' first not supported for '
623
649
' non-numeric data' )
650
+ elif tiebreak == TIEBREAK_DENSE:
651
+ total_tie_count += 1
652
+ for z in range (j - dups + 1 , j + 1 ):
653
+ ranks[i, argsorted[i, z]] = total_tie_count
624
654
sum_ranks = dups = 0
625
655
626
656
if axis == 0 :
0 commit comments