Skip to content

Commit a936863

Browse files
authored
CLN: Cython 3 cleanups (#54482)
* CLN: Update code for Cython 3 * clean more * fix tests
1 parent f935543 commit a936863

16 files changed

+105
-362
lines changed

pandas/_libs/algos.pyx

+8-109
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,7 @@ def rank_1d(
998998

999999
N = len(values)
10001000
if labels is not None:
1001-
# TODO(cython3): cast won't be necessary (#2992)
1002-
assert <Py_ssize_t>len(labels) == N
1001+
assert len(labels) == N
10031002
out = np.empty(N)
10041003
grp_sizes = np.ones(N, dtype=np.int64)
10051004

@@ -1088,8 +1087,7 @@ cdef void rank_sorted_1d(
10881087
float64_t[::1] out,
10891088
int64_t[::1] grp_sizes,
10901089
const intp_t[:] sort_indexer,
1091-
# TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
1092-
numeric_object_t[:] masked_vals,
1090+
const numeric_object_t[:] masked_vals,
10931091
const uint8_t[:] mask,
10941092
bint check_mask,
10951093
Py_ssize_t N,
@@ -1144,108 +1142,7 @@ cdef void rank_sorted_1d(
11441142
# array that we sorted previously, which gives us the location of
11451143
# that sorted value for retrieval back from the original
11461144
# values / masked_vals arrays
1147-
# TODO(cython3): de-duplicate once cython supports conditional nogil
1148-
if numeric_object_t is object:
1149-
with gil:
1150-
for i in range(N):
1151-
at_end = i == N - 1
1152-
1153-
# dups and sum_ranks will be incremented each loop where
1154-
# the value / group remains the same, and should be reset
1155-
# when either of those change. Used to calculate tiebreakers
1156-
dups += 1
1157-
sum_ranks += i - grp_start + 1
1158-
1159-
next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160-
masked_vals[sort_indexer[i+1]])
1161-
1162-
# We'll need this check later anyway to determine group size, so just
1163-
# compute it here since shortcircuiting won't help
1164-
group_changed = at_end or (check_labels and
1165-
(labels[sort_indexer[i]]
1166-
!= labels[sort_indexer[i+1]]))
1167-
1168-
# Update out only when there is a transition of values or labels.
1169-
# When a new value or group is encountered, go back #dups steps(
1170-
# the number of occurrence of current value) and assign the ranks
1171-
# based on the starting index of the current group (grp_start)
1172-
# and the current index
1173-
if (next_val_diff or group_changed or (check_mask and
1174-
(mask[sort_indexer[i]]
1175-
^ mask[sort_indexer[i+1]]))):
1176-
1177-
# If keep_na, check for missing values and assign back
1178-
# to the result where appropriate
1179-
if keep_na and check_mask and mask[sort_indexer[i]]:
1180-
grp_na_count = dups
1181-
for j in range(i - dups + 1, i + 1):
1182-
out[sort_indexer[j]] = NaN
1183-
elif tiebreak == TIEBREAK_AVERAGE:
1184-
for j in range(i - dups + 1, i + 1):
1185-
out[sort_indexer[j]] = sum_ranks / <float64_t>dups
1186-
elif tiebreak == TIEBREAK_MIN:
1187-
for j in range(i - dups + 1, i + 1):
1188-
out[sort_indexer[j]] = i - grp_start - dups + 2
1189-
elif tiebreak == TIEBREAK_MAX:
1190-
for j in range(i - dups + 1, i + 1):
1191-
out[sort_indexer[j]] = i - grp_start + 1
1192-
1193-
# With n as the previous rank in the group and m as the number
1194-
# of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195-
# then rankings should be n + 1, n + 2 ... n + m
1196-
elif tiebreak == TIEBREAK_FIRST:
1197-
for j in range(i - dups + 1, i + 1):
1198-
out[sort_indexer[j]] = j + 1 - grp_start
1199-
1200-
# If TIEBREAK_FIRST and descending, the ranking should be
1201-
# n + m, n + (m - 1) ... n + 1. This is equivalent to
1202-
# (i - dups + 1) + (i - j + 1) - grp_start
1203-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204-
for j in range(i - dups + 1, i + 1):
1205-
out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206-
elif tiebreak == TIEBREAK_DENSE:
1207-
for j in range(i - dups + 1, i + 1):
1208-
out[sort_indexer[j]] = grp_vals_seen
1209-
1210-
# Look forward to the next value (using the sorting in
1211-
# lexsort_indexer). If the value does not equal the current
1212-
# value then we need to reset the dups and sum_ranks, knowing
1213-
# that a new value is coming up. The conditional also needs
1214-
# to handle nan equality and the end of iteration. If group
1215-
# changes we do not record seeing a new value in the group
1216-
if not group_changed and (next_val_diff or (check_mask and
1217-
(mask[sort_indexer[i]]
1218-
^ mask[sort_indexer[i+1]]))):
1219-
dups = sum_ranks = 0
1220-
grp_vals_seen += 1
1221-
1222-
# Similar to the previous conditional, check now if we are
1223-
# moving to a new group. If so, keep track of the index where
1224-
# the new group occurs, so the tiebreaker calculations can
1225-
# decrement that from their position. Fill in the size of each
1226-
# group encountered (used by pct calculations later). Also be
1227-
# sure to reset any of the items helping to calculate dups
1228-
if group_changed:
1229-
1230-
# If not dense tiebreak, group size used to compute
1231-
# percentile will be # of non-null elements in group
1232-
if tiebreak != TIEBREAK_DENSE:
1233-
grp_size = i - grp_start + 1 - grp_na_count
1234-
1235-
# Otherwise, it will be the number of distinct values
1236-
# in the group, subtracting 1 if NaNs are present
1237-
# since that is a distinct value we shouldn't count
1238-
else:
1239-
grp_size = grp_vals_seen - (grp_na_count > 0)
1240-
1241-
for j in range(grp_start, i + 1):
1242-
grp_sizes[sort_indexer[j]] = grp_size
1243-
1244-
dups = sum_ranks = 0
1245-
grp_na_count = 0
1246-
grp_start = i + 1
1247-
grp_vals_seen = 1
1248-
else:
1145+
with gil(numeric_object_t is object):
12491146
for i in range(N):
12501147
at_end = i == N - 1
12511148

@@ -1474,16 +1371,18 @@ ctypedef fused out_t:
14741371
@cython.boundscheck(False)
14751372
@cython.wraparound(False)
14761373
def diff_2d(
1477-
ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr"
1478-
ndarray[out_t, ndim=2] out,
1374+
# TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
1375+
ndarray[diff_t, ndim=2] arr,
1376+
out_t[:, :] out,
14791377
Py_ssize_t periods,
14801378
int axis,
14811379
bint datetimelike=False,
14821380
):
14831381
cdef:
14841382
Py_ssize_t i, j, sx, sy, start, stop
14851383
bint f_contig = arr.flags.f_contiguous
1486-
# bint f_contig = arr.is_f_contig() # TODO(cython3)
1384+
# TODO: change to this when arr becomes a memoryview
1385+
# bint f_contig = arr.is_f_contig()
14871386
diff_t left, right
14881387

14891388
# Disable for unsupported dtype combinations,

pandas/_libs/arrays.pyx

+1-2
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,7 @@ cdef class NDArrayBacked:
126126

127127
@property
128128
def size(self) -> int:
129-
# TODO(cython3): use self._ndarray.size
130-
return cnp.PyArray_SIZE(self._ndarray)
129+
return self._ndarray.size
131130

132131
@property
133132
def nbytes(self) -> int:

0 commit comments

Comments
 (0)