Skip to content

Commit 5abf6d8

Browse files
committed
Working tests (excl missing data)
1 parent 7de2824 commit 5abf6d8

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

pandas/_libs/groupby.pyx

+7-6
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
131131
cdef:
132132
int tiebreak
133133
Py_ssize_t i, j, N, K
134-
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0
134+
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, vals_seen=1
135135
ndarray[int64_t] _as
136136
bint pct, ascending
137137

@@ -160,20 +160,21 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
160160
out[_as[j], 0] = i - grp_start + 1
161161
elif tiebreak == TIEBREAK_FIRST:
162162
for j in range(i - dups + 1, i + 1):
163-
out[_as[j], 0] = j + 1
164-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
165-
for j in range(i - dups + 1, i + 1):
166-
out[_as[j], 0] = 2 * (i - grp_start) - j - dups + 2
163+
if ascending:
164+
out[_as[j], 0] = j + 1
165+
else:
166+
out[_as[j], 0] = 2 * i - j - dups + 2
167167
elif tiebreak == TIEBREAK_DENSE:
168168
for j in range(i - dups + 1, i + 1):
169-
out[_as[j], 0] = val_start - grp_start
169+
out[_as[j], 0] = vals_seen
170170

171171
if (i == N - 1 or (
172172
(values[_as[i], 0] != values[_as[i+1], 0]) and not
173173
(values[_as[i], 0] is np.nan and values[_as[i+1], 0] is np.nan)
174174
)):
175175
dups = sum_ranks = 0
176176
val_start = i
177+
vals_seen += 1
177178

178179
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
179180
if pct:

pandas/_libs/groupby_helper.pxi.in

+5-2
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
457457
cdef:
458458
int tiebreak
459459
Py_ssize_t i, j, N, K
460-
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0
460+
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, vals_seen=1
461461
ndarray[int64_t] _as
462462
bint pct, ascending
463463

@@ -493,20 +493,23 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
493493
out[_as[j], 0] = 2 * i - j - dups + 2
494494
elif tiebreak == TIEBREAK_DENSE:
495495
for j in range(i - dups + 1, i + 1):
496-
out[_as[j], 0] = val_start - grp_start
496+
out[_as[j], 0] = vals_seen
497+
497498
if (i == N - 1 or (
498499
(values[_as[i], 0] != values[_as[i+1], 0]) and not
499500
(isnan(values[_as[i], 0]) and
500501
isnan(values[_as[i+1], 0])
501502
))):
502503
dups = sum_ranks = 0
503504
val_start = i
505+
vals_seen += 1
504506

505507
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
506508
if pct:
507509
for j in range(grp_start, i + 1):
508510
out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1)
509511
grp_start = i + 1
512+
vals_seen = 1
510513

511514
{{endfor}}
512515

0 commit comments

Comments
 (0)