Skip to content

Commit 05579af

Browse files
committed
Raise ValueError in group_rank_object
1 parent deb24c9 commit 05579af

File tree

1 file changed

+1
-85
lines changed

1 file changed

+1
-85
lines changed

pandas/_libs/groupby.pyx

+1-85
Original file line numberDiff line numberDiff line change
@@ -124,96 +124,12 @@ def group_last_object(ndarray[object, ndim=2] out,
124124
out[i, j] = resx[i, j]
125125

126126

127-
@cython.boundscheck(False)
128-
@cython.wraparound(False)
129127
def group_rank_object(ndarray[float64_t, ndim=2] out,
130128
ndarray[object, ndim=2] values,
131129
ndarray[int64_t] labels,
132130
bint is_datetimelike, object ties_method,
133131
bint ascending, bint pct, object na_option):
134-
"""
135-
Only transforms on axis=0
136-
"""
137-
cdef:
138-
TiebreakEnumType tiebreak
139-
Py_ssize_t i, j, N, K
140-
int64_t val_start=0, grp_start=0, dups=0, sum_ranks=0, grp_vals_seen=1
141-
int64_t grp_na_count=0
142-
ndarray[int64_t] _as
143-
ndarray[object] _values
144-
bint keep_na
145-
146-
tiebreak = tiebreakers[ties_method]
147-
keep_na = na_option == 'keep'
148-
N, K = (<object> values).shape
149-
150-
masked_vals = np.array(values[:, 0], copy=True)
151-
mask = missing.isnaobj(masked_vals)
152-
153-
if ascending ^ (na_option == 'top'):
154-
nan_fill_val = np.inf
155-
order = (masked_vals, mask, labels)
156-
else:
157-
nan_fill_val = -np.inf
158-
order = (masked_vals, ~mask, labels)
159-
np.putmask(masked_vals, mask, nan_fill_val)
160-
try:
161-
_as = np.lexsort(order)
162-
except TypeError:
163-
# lexsort fails when missing data and objects are mixed
164-
# fallback to argsort
165-
_arr = np.asarray(list(zip(order[0], order[1], order[2])),
166-
dtype=[('values', 'O'), ('mask', '?'),
167-
('labels', 'i8')])
168-
_as = np.argsort(_arr, kind='mergesort', order=('labels',
169-
'mask', 'values'))
170-
171-
if not ascending:
172-
_as = _as[::-1]
173-
174-
for i in range(N):
175-
dups += 1
176-
sum_ranks += i - grp_start + 1
177-
178-
if keep_na and (values[_as[i], 0] != values[_as[i], 0]):
179-
grp_na_count += 1
180-
out[_as[i], 0] = np.nan
181-
else:
182-
if tiebreak == TIEBREAK_AVERAGE:
183-
for j in range(i - dups + 1, i + 1):
184-
out[_as[j], 0] = sum_ranks / dups
185-
elif tiebreak == TIEBREAK_MIN:
186-
for j in range(i - dups + 1, i + 1):
187-
out[_as[j], 0] = i - grp_start - dups + 2
188-
elif tiebreak == TIEBREAK_MAX:
189-
for j in range(i - dups + 1, i + 1):
190-
out[_as[j], 0] = i - grp_start + 1
191-
elif tiebreak == TIEBREAK_FIRST:
192-
for j in range(i - dups + 1, i + 1):
193-
if ascending:
194-
out[_as[j], 0] = j + 1 - grp_start
195-
else:
196-
out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
197-
elif tiebreak == TIEBREAK_DENSE:
198-
for j in range(i - dups + 1, i + 1):
199-
out[_as[j], 0] = grp_vals_seen
200-
201-
if i == N - 1 or (
202-
(values[_as[i], 0] != values[_as[i+1], 0]) and not
203-
(values[_as[i], 0] is np.nan and values[_as[i+1], 0] is np.nan)
204-
):
205-
dups = sum_ranks = 0
206-
val_start = i
207-
grp_vals_seen += 1
208-
209-
if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
210-
if pct:
211-
for j in range(grp_start, i + 1):
212-
out[_as[j], 0] = out[_as[j], 0] / (i - grp_start + 1
213-
- grp_na_count)
214-
grp_na_count = 0
215-
grp_start = i + 1
216-
grp_vals_seen = 1
132+
raise ValueError("rank not supported for object dtypes")
217133

218134

219135
cdef inline float64_t median_linear(float64_t* a, int n) nogil:

0 commit comments

Comments
 (0)