Skip to content

Commit d650212

Browse files
authored
Fix Cython 3.0 regression with time_loc_dups (#55915)
* fix performance regression with time_loc_dups and Cython 3 * replace all index calls with PySequence_GetItem
1 parent 09ed69e commit d650212

File tree

1 file changed

+15
-14
lines changed

1 file changed

+15
-14
lines changed

pandas/_libs/index.pyx

+15-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
cimport cython
2+
from cpython.sequence cimport PySequence_GetItem
23

34
import numpy as np
45

@@ -77,7 +78,7 @@ cdef ndarray _get_bool_indexer(ndarray values, object val, ndarray mask = None):
7778
indexer = np.empty(len(values), dtype=np.uint8)
7879

7980
for i in range(len(values)):
80-
item = values[i]
81+
item = PySequence_GetItem(values, i)
8182
indexer[i] = is_matching_na(item, val)
8283

8384
else:
@@ -405,7 +406,7 @@ cdef class IndexEngine:
405406
found_nas = set()
406407

407408
for i in range(n):
408-
val = values[i]
409+
val = PySequence_GetItem(values, i)
409410

410411
# GH#43870
411412
# handle lookup for nas
@@ -437,7 +438,7 @@ cdef class IndexEngine:
437438
d[val].append(i)
438439

439440
for i in range(n_t):
440-
val = targets[i]
441+
val = PySequence_GetItem(targets, i)
441442

442443
# ensure there are nas in values before looking for a matching na
443444
if check_na_values and checknull(val):
@@ -488,22 +489,22 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
488489
Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
489490
object pval
490491

491-
if hi == 0 or (hi > 0 and val > values[hi]):
492+
if hi == 0 or (hi > 0 and val > PySequence_GetItem(values, hi)):
492493
return len(values)
493494

494495
while lo < hi:
495496
mid = (lo + hi) // 2
496-
pval = values[mid]
497+
pval = PySequence_GetItem(values, mid)
497498
if val < pval:
498499
hi = mid
499500
elif val > pval:
500501
lo = mid + 1
501502
else:
502-
while mid > 0 and val == values[mid - 1]:
503+
while mid > 0 and val == PySequence_GetItem(values, mid - 1):
503504
mid -= 1
504505
return mid
505506

506-
if val <= values[mid]:
507+
if val <= PySequence_GetItem(values, mid):
507508
return mid
508509
else:
509510
return mid + 1
@@ -591,7 +592,7 @@ cdef class DatetimeEngine(Int64Engine):
591592

592593
loc = values.searchsorted(conv, side="left")
593594

594-
if loc == len(values) or values[loc] != conv:
595+
if loc == len(values) or PySequence_GetItem(values, loc) != conv:
595596
raise KeyError(val)
596597
return loc
597598

@@ -962,7 +963,7 @@ cdef class SharedEngine:
962963
res = np.empty(N, dtype=np.intp)
963964

964965
for i in range(N):
965-
val = values[i]
966+
val = PySequence_GetItem(values, i)
966967
try:
967968
loc = self.get_loc(val)
968969
# Because we are unique, loc should always be an integer
@@ -996,7 +997,7 @@ cdef class SharedEngine:
996997

997998
# See also IntervalIndex.get_indexer_pointwise
998999
for i in range(N):
999-
val = targets[i]
1000+
val = PySequence_GetItem(targets, i)
10001001

10011002
try:
10021003
locs = self.get_loc(val)
@@ -1176,9 +1177,9 @@ cdef class MaskedIndexEngine(IndexEngine):
11761177
na_pos = []
11771178

11781179
for i in range(n):
1179-
val = values[i]
1180+
val = PySequence_GetItem(values, i)
11801181

1181-
if mask[i]:
1182+
if PySequence_GetItem(mask, i):
11821183
na_pos.append(i)
11831184

11841185
else:
@@ -1188,9 +1189,9 @@ cdef class MaskedIndexEngine(IndexEngine):
11881189
d[val].append(i)
11891190

11901191
for i in range(n_t):
1191-
val = target_vals[i]
1192+
val = PySequence_GetItem(target_vals, i)
11921193

1193-
if target_mask[i]:
1194+
if PySequence_GetItem(target_mask, i):
11941195
if na_pos:
11951196
for na_idx in na_pos:
11961197
# realloc if needed

0 commit comments

Comments
 (0)