Skip to content

Commit f6ae88a

Browse files
committed
Raise ValueError in group_rank_object
1 parent 178654d commit f6ae88a

File tree

1 file changed

+1
-85
lines changed

1 file changed

+1
-85
lines changed

pandas/_libs/groupby.pyx

Lines changed: 1 addition & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -130,96 +130,12 @@ def group_last_object(ndarray[object, ndim=2] out,
130130
out[i, j] = resx[i, j]
131131

132132

133-
@cython.boundscheck(False)
134-
@cython.wraparound(False)
135133
def group_rank_object(ndarray[float64_t, ndim=2] out,
136134
ndarray[object, ndim=2] values,
137135
ndarray[int64_t] labels,
138136
bint is_datetimelike, object ties_method,
139137
bint ascending, bint pct, object na_option):
140-
"""
141-
Only transforms on axis=0
142-
"""
143-
cdef:
144-
TiebreakEnumType tiebreak
145-
Py_ssize_t i, j, N, K
146-
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, grp_vals_seen=1
147-
int64_t grp_na_count=0
148-
ndarray[int64_t] _as
149-
ndarray[object] _values
150-
bint keep_na
151-
152-
tiebreak = tiebreakers[ties_method]
153-
keep_na = na_option == 'keep'
154-
N, K = (<object> values).shape
155-
156-
masked_vals = np.array(values[:, 0], copy=True)
157-
mask = missing.isnaobj(masked_vals)
158-
159-
if ascending ^ (na_option == 'top'):
160-
nan_fill_val = np.inf
161-
order = (masked_vals, mask, labels)
162-
else:
163-
nan_fill_val = -np.inf
164-
order = (masked_vals, ~mask, labels)
165-
np.putmask(masked_vals, mask, nan_fill_val)
166-
try:
167-
_as = np.lexsort(order)
168-
except TypeError:
169-
# lexsort fails when missing data and objects are mixed
170-
# fallback to argsort
171-
_arr = np.asarray(list(zip(order[0], order[1], order[2])),
172-
dtype=[('values', 'O'), ('mask', '?'),
173-
('labels', 'i8')])
174-
_as = np.argsort(_arr, kind='mergesort', order=('labels',
175-
'mask', 'values'))
176-
177-
if not ascending:
178-
_as = _as[::-1]
179-
180-
for i in range(N):
181-
dups += 1
182-
sum_ranks += i - grp_start + 1
183-
184-
if keep_na and (values[_as[i], 0] != values[_as[i], 0]):
185-
grp_na_count += 1
186-
out[_as[i], 0] = np.nan
187-
else:
188-
if tiebreak == TIEBREAK_AVERAGE:
189-
for j in range(i - dups + 1, i + 1):
190-
out[_as[j], 0] = sum_ranks / dups
191-
elif tiebreak == TIEBREAK_MIN:
192-
for j in range(i - dups + 1, i + 1):
193-
out[_as[j], 0] = i - grp_start - dups + 2
194-
elif tiebreak == TIEBREAK_MAX:
195-
for j in range(i - dups + 1, i + 1):
196-
out[_as[j], 0] = i - grp_start + 1
197-
elif tiebreak == TIEBREAK_FIRST:
198-
for j in range(i - dups + 1, i + 1):
199-
if ascending:
200-
out[_as[j], 0] = j + 1 - grp_start
201-
else:
202-
out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
203-
elif tiebreak == TIEBREAK_DENSE:
204-
for j in range(i - dups + 1, i + 1):
205-
out[_as[j], 0] = grp_vals_seen
206-
207-
if i == N - 1 or (
208-
(values[_as[i], 0] != values[_as[i+1], 0]) and not
209-
(values[_as[i], 0] is np.nan and values[_as[i+1], 0] is np.nan)
210-
):
211-
dups = sum_ranks = 0
212-
val_start = i
213-
grp_vals_seen += 1
214-
215-
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
216-
if pct:
217-
for j in range(grp_start, i + 1):
218-
out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1
219-
- grp_na_count)
220-
grp_na_count = 0
221-
grp_start = i + 1
222-
grp_vals_seen = 1
138+
raise ValueError("rank not supported for object dtypes")
223139

224140

225141
cdef inline float64_t median_linear(float64_t* a, int n) nogil:

0 commit comments

Comments
 (0)