@@ -130,96 +130,12 @@ def group_last_object(ndarray[object, ndim=2] out,
130
130
out[i, j] = resx[i, j]
131
131
132
132
133
- @ cython.boundscheck (False )
134
- @ cython.wraparound (False )
135
133
def group_rank_object (ndarray[float64_t , ndim = 2 ] out,
136
134
ndarray[object , ndim = 2 ] values,
137
135
ndarray[int64_t] labels ,
138
136
bint is_datetimelike , object ties_method ,
139
137
bint ascending , bint pct , object na_option ):
140
- """
141
- Only transforms on axis=0
142
- """
143
- cdef:
144
- TiebreakEnumType tiebreak
145
- Py_ssize_t i, j, N, K
146
- int64_t val_start= 0 , grp_start= 0 , dups= 0 , sum_ranks= 0 , grp_vals_seen= 1
147
- int64_t grp_na_count= 0
148
- ndarray[int64_t] _as
149
- ndarray[object ] _values
150
- bint keep_na
151
-
152
- tiebreak = tiebreakers[ties_method]
153
- keep_na = na_option == ' keep'
154
- N, K = (< object > values).shape
155
-
156
- masked_vals = np.array(values[:, 0 ], copy = True )
157
- mask = missing.isnaobj(masked_vals)
158
-
159
- if ascending ^ (na_option == ' top' ):
160
- nan_fill_val = np.inf
161
- order = (masked_vals, mask, labels)
162
- else :
163
- nan_fill_val = - np.inf
164
- order = (masked_vals, ~ mask, labels)
165
- np.putmask(masked_vals, mask, nan_fill_val)
166
- try :
167
- _as = np.lexsort(order)
168
- except TypeError :
169
- # lexsort fails when missing data and objects are mixed
170
- # fallback to argsort
171
- _arr = np.asarray(list (zip (order[0 ], order[1 ], order[2 ])),
172
- dtype = [(' values' , ' O' ), (' mask' , ' ?' ),
173
- (' labels' , ' i8' )])
174
- _as = np.argsort(_arr, kind = ' mergesort' , order = (' labels' ,
175
- ' mask' , ' values' ))
176
-
177
- if not ascending:
178
- _as = _as[::- 1 ]
179
-
180
- for i in range (N):
181
- dups += 1
182
- sum_ranks += i - grp_start + 1
183
-
184
- if keep_na and (values[_as[i], 0 ] != values[_as[i], 0 ]):
185
- grp_na_count += 1
186
- out[_as[i], 0 ] = np.nan
187
- else :
188
- if tiebreak == TIEBREAK_AVERAGE:
189
- for j in range (i - dups + 1 , i + 1 ):
190
- out[_as[j], 0 ] = sum_ranks / dups
191
- elif tiebreak == TIEBREAK_MIN:
192
- for j in range (i - dups + 1 , i + 1 ):
193
- out[_as[j], 0 ] = i - grp_start - dups + 2
194
- elif tiebreak == TIEBREAK_MAX:
195
- for j in range (i - dups + 1 , i + 1 ):
196
- out[_as[j], 0 ] = i - grp_start + 1
197
- elif tiebreak == TIEBREAK_FIRST:
198
- for j in range (i - dups + 1 , i + 1 ):
199
- if ascending:
200
- out[_as[j], 0 ] = j + 1 - grp_start
201
- else :
202
- out[_as[j], 0 ] = 2 * i - j - dups + 2 - grp_start
203
- elif tiebreak == TIEBREAK_DENSE:
204
- for j in range (i - dups + 1 , i + 1 ):
205
- out[_as[j], 0 ] = grp_vals_seen
206
-
207
- if i == N - 1 or (
208
- (values[_as[i], 0 ] != values[_as[i+ 1 ], 0 ]) and not
209
- (values[_as[i], 0 ] is np.nan and values[_as[i+ 1 ], 0 ] is np.nan)
210
- ):
211
- dups = sum_ranks = 0
212
- val_start = i
213
- grp_vals_seen += 1
214
-
215
- if i == N - 1 or labels[_as[i]] != labels[_as[i+ 1 ]]:
216
- if pct:
217
- for j in range (grp_start, i + 1 ):
218
- out[_as[j], 0 ] = out[_as[j], 0 ] / (i - grp_start + 1
219
- - grp_na_count)
220
- grp_na_count = 0
221
- grp_start = i + 1
222
- grp_vals_seen = 1
138
+ raise ValueError (" rank not supported for object dtypes" )
223
139
224
140
225
141
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
0 commit comments