@@ -124,96 +124,12 @@ def group_last_object(ndarray[object, ndim=2] out,
124
124
out[i, j] = resx[i, j]
125
125
126
126
127
- @ cython.boundscheck (False )
128
- @ cython.wraparound (False )
129
127
def group_rank_object (ndarray[float64_t , ndim = 2 ] out,
130
128
ndarray[object , ndim = 2 ] values,
131
129
ndarray[int64_t] labels ,
132
130
bint is_datetimelike , object ties_method ,
133
131
bint ascending , bint pct , object na_option ):
134
- """
135
- Only transforms on axis=0
136
- """
137
- cdef:
138
- TiebreakEnumType tiebreak
139
- Py_ssize_t i, j, N, K
140
- int64_t val_start= 0 , grp_start= 0 , dups= 0 , sum_ranks= 0 , grp_vals_seen= 1
141
- int64_t grp_na_count= 0
142
- ndarray[int64_t] _as
143
- ndarray[object ] _values
144
- bint keep_na
145
-
146
- tiebreak = tiebreakers[ties_method]
147
- keep_na = na_option == ' keep'
148
- N, K = (< object > values).shape
149
-
150
- masked_vals = np.array(values[:, 0 ], copy = True )
151
- mask = missing.isnaobj(masked_vals)
152
-
153
- if ascending ^ (na_option == ' top' ):
154
- nan_fill_val = np.inf
155
- order = (masked_vals, mask, labels)
156
- else :
157
- nan_fill_val = - np.inf
158
- order = (masked_vals, ~ mask, labels)
159
- np.putmask(masked_vals, mask, nan_fill_val)
160
- try :
161
- _as = np.lexsort(order)
162
- except TypeError :
163
- # lexsort fails when missing data and objects are mixed
164
- # fallback to argsort
165
- _arr = np.asarray(list (zip (order[0 ], order[1 ], order[2 ])),
166
- dtype = [(' values' , ' O' ), (' mask' , ' ?' ),
167
- (' labels' , ' i8' )])
168
- _as = np.argsort(_arr, kind = ' mergesort' , order = (' labels' ,
169
- ' mask' , ' values' ))
170
-
171
- if not ascending:
172
- _as = _as[::- 1 ]
173
-
174
- for i in range (N):
175
- dups += 1
176
- sum_ranks += i - grp_start + 1
177
-
178
- if keep_na and (values[_as[i], 0 ] != values[_as[i], 0 ]):
179
- grp_na_count += 1
180
- out[_as[i], 0 ] = np.nan
181
- else :
182
- if tiebreak == TIEBREAK_AVERAGE:
183
- for j in range (i - dups + 1 , i + 1 ):
184
- out[_as[j], 0 ] = sum_ranks / dups
185
- elif tiebreak == TIEBREAK_MIN:
186
- for j in range (i - dups + 1 , i + 1 ):
187
- out[_as[j], 0 ] = i - grp_start - dups + 2
188
- elif tiebreak == TIEBREAK_MAX:
189
- for j in range (i - dups + 1 , i + 1 ):
190
- out[_as[j], 0 ] = i - grp_start + 1
191
- elif tiebreak == TIEBREAK_FIRST:
192
- for j in range (i - dups + 1 , i + 1 ):
193
- if ascending:
194
- out[_as[j], 0 ] = j + 1 - grp_start
195
- else :
196
- out[_as[j], 0 ] = 2 * i - j - dups + 2 - grp_start
197
- elif tiebreak == TIEBREAK_DENSE:
198
- for j in range (i - dups + 1 , i + 1 ):
199
- out[_as[j], 0 ] = grp_vals_seen
200
-
201
- if i == N - 1 or (
202
- (values[_as[i], 0 ] != values[_as[i+ 1 ], 0 ]) and not
203
- (values[_as[i], 0 ] is np.nan and values[_as[i+ 1 ], 0 ] is np.nan)
204
- ):
205
- dups = sum_ranks = 0
206
- val_start = i
207
- grp_vals_seen += 1
208
-
209
- if i == N - 1 or labels[_as[i]] != labels[_as[i+ 1 ]]:
210
- if pct:
211
- for j in range (grp_start, i + 1 ):
212
- out[_as[j], 0 ] = out[_as[j], 0 ] / (i - grp_start + 1
213
- - grp_na_count)
214
- grp_na_count = 0
215
- grp_start = i + 1
216
- grp_vals_seen = 1
132
+ raise ValueError (" rank not supported for object dtypes" )
217
133
218
134
219
135
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
0 commit comments