@@ -134,29 +134,37 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
134
134
int tiebreak
135
135
Py_ssize_t i, j, N, K
136
136
int64_t val_start= 0 , grp_start= 0 , dups= 0 , sum_ranks= 0 , vals_seen= 1
137
+ int64_t grp_na_count= 0
137
138
ndarray[int64_t] _as
138
- bint pct, ascending
139
+ ndarray[object ] _values
140
+ bint pct, ascending, keep_na
139
141
140
142
tiebreak = tiebreakers[kwargs[' ties_method' ]]
141
143
ascending = kwargs[' ascending' ]
142
144
pct = kwargs[' pct' ]
143
145
keep_na = kwargs[' na_option' ] == ' keep'
144
146
N, K = (< object > values).shape
145
147
146
- vals = np.array(values[:, 0 ], copy = True )
147
- mask = missing.isnaobj(vals )
148
+ _values = np.array(values[:, 0 ], copy = True )
149
+ mask = missing.isnaobj(_values )
148
150
151
+ if ascending ^ (kwargs[' na_option' ] == ' top' ):
152
+ nan_value = np.inf
153
+ order = (_values, mask, labels)
154
+ else :
155
+ nan_value = - np.inf
156
+ order = (_values, ~ mask, labels)
157
+ np.putmask(_values, mask, nan_value)
149
158
try :
150
- _as = np.lexsort((vals, labels) )
159
+ _as = np.lexsort(order )
151
160
except TypeError :
152
161
# lexsort fails when missing data and objects are mixed
153
162
# fallback to argsort
154
- order = (vals, mask, labels)
155
- _values = np.asarray(list (zip (order[0 ], order[1 ], order[2 ])),
156
- dtype = [(' values' , ' O' ), (' mask' , ' ?' ),
157
- (' labels' , ' i8' )])
158
- _as = np.argsort(_values, kind = ' mergesort' , order = (' labels' ,
159
- ' mask' , ' values' ))
163
+ _arr = np.asarray(list (zip (order[0 ], order[1 ], order[2 ])),
164
+ dtype = [(' values' , ' O' ), (' mask' , ' ?' ),
165
+ (' labels' , ' i8' )])
166
+ _as = np.argsort(_arr, kind = ' mergesort' , order = (' labels' ,
167
+ ' mask' , ' values' ))
160
168
161
169
if not ascending:
162
170
_as = _as[::- 1 ]
@@ -165,7 +173,8 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
165
173
dups += 1
166
174
sum_ranks += i - grp_start + 1
167
175
168
- if keep_na and mask[_as[i]]:
176
+ if keep_na and (values[_as[i], 0 ] != values[_as[i], 0 ]):
177
+ grp_na_count += 1
169
178
out[_as[i], 0 ] = np.nan
170
179
else :
171
180
if tiebreak == TIEBREAK_AVERAGE:
@@ -198,8 +207,11 @@ def group_rank_object(ndarray[float64_t, ndim=2] out,
198
207
if i == N - 1 or labels[_as[i]] != labels[_as[i+ 1 ]]:
199
208
if pct:
200
209
for j in range (grp_start, i + 1 ):
201
- out[_as[j], 0 ] = out[_as[j], 0 ] / (i - grp_start + 1 )
210
+ out[_as[j], 0 ] = out[_as[j], 0 ] / (i - grp_start + 1
211
+ - grp_na_count)
212
+ grp_na_count = 0
202
213
grp_start = i + 1
214
+ vals_seen = 1
203
215
204
216
205
217
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
0 commit comments