Skip to content

Commit fc0164c

Browse files
authored
REF: use conditional-nogil in libalgos (#56025)
1 parent 97828e4 commit fc0164c

File tree

1 file changed

+10
-107
lines changed

1 file changed

+10
-107
lines changed

pandas/_libs/algos.pyx

+10-107
Original file line numberDiff line numberDiff line change
@@ -1145,107 +1145,7 @@ cdef void rank_sorted_1d(
11451145
# that sorted value for retrieval back from the original
11461146
# values / masked_vals arrays
11471147
# TODO(cython3): de-duplicate once cython supports conditional nogil
1148-
if numeric_object_t is object:
1149-
with gil:
1150-
for i in range(N):
1151-
at_end = i == N - 1
1152-
1153-
# dups and sum_ranks will be incremented each loop where
1154-
# the value / group remains the same, and should be reset
1155-
# when either of those change. Used to calculate tiebreakers
1156-
dups += 1
1157-
sum_ranks += i - grp_start + 1
1158-
1159-
next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160-
masked_vals[sort_indexer[i+1]])
1161-
1162-
# We'll need this check later anyway to determine group size, so just
1163-
# compute it here since shortcircuiting won't help
1164-
group_changed = at_end or (check_labels and
1165-
(labels[sort_indexer[i]]
1166-
!= labels[sort_indexer[i+1]]))
1167-
1168-
# Update out only when there is a transition of values or labels.
1169-
# When a new value or group is encountered, go back #dups steps(
1170-
# the number of occurrence of current value) and assign the ranks
1171-
# based on the starting index of the current group (grp_start)
1172-
# and the current index
1173-
if (next_val_diff or group_changed or (check_mask and
1174-
(mask[sort_indexer[i]]
1175-
^ mask[sort_indexer[i+1]]))):
1176-
1177-
# If keep_na, check for missing values and assign back
1178-
# to the result where appropriate
1179-
if keep_na and check_mask and mask[sort_indexer[i]]:
1180-
grp_na_count = dups
1181-
for j in range(i - dups + 1, i + 1):
1182-
out[sort_indexer[j]] = NaN
1183-
elif tiebreak == TIEBREAK_AVERAGE:
1184-
for j in range(i - dups + 1, i + 1):
1185-
out[sort_indexer[j]] = sum_ranks / <float64_t>dups
1186-
elif tiebreak == TIEBREAK_MIN:
1187-
for j in range(i - dups + 1, i + 1):
1188-
out[sort_indexer[j]] = i - grp_start - dups + 2
1189-
elif tiebreak == TIEBREAK_MAX:
1190-
for j in range(i - dups + 1, i + 1):
1191-
out[sort_indexer[j]] = i - grp_start + 1
1192-
1193-
# With n as the previous rank in the group and m as the number
1194-
# of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195-
# then rankings should be n + 1, n + 2 ... n + m
1196-
elif tiebreak == TIEBREAK_FIRST:
1197-
for j in range(i - dups + 1, i + 1):
1198-
out[sort_indexer[j]] = j + 1 - grp_start
1199-
1200-
# If TIEBREAK_FIRST and descending, the ranking should be
1201-
# n + m, n + (m - 1) ... n + 1. This is equivalent to
1202-
# (i - dups + 1) + (i - j + 1) - grp_start
1203-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204-
for j in range(i - dups + 1, i + 1):
1205-
out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206-
elif tiebreak == TIEBREAK_DENSE:
1207-
for j in range(i - dups + 1, i + 1):
1208-
out[sort_indexer[j]] = grp_vals_seen
1209-
1210-
# Look forward to the next value (using the sorting in
1211-
# lexsort_indexer). If the value does not equal the current
1212-
# value then we need to reset the dups and sum_ranks, knowing
1213-
# that a new value is coming up. The conditional also needs
1214-
# to handle nan equality and the end of iteration. If group
1215-
# changes we do not record seeing a new value in the group
1216-
if not group_changed and (next_val_diff or (check_mask and
1217-
(mask[sort_indexer[i]]
1218-
^ mask[sort_indexer[i+1]]))):
1219-
dups = sum_ranks = 0
1220-
grp_vals_seen += 1
1221-
1222-
# Similar to the previous conditional, check now if we are
1223-
# moving to a new group. If so, keep track of the index where
1224-
# the new group occurs, so the tiebreaker calculations can
1225-
# decrement that from their position. Fill in the size of each
1226-
# group encountered (used by pct calculations later). Also be
1227-
# sure to reset any of the items helping to calculate dups
1228-
if group_changed:
1229-
1230-
# If not dense tiebreak, group size used to compute
1231-
# percentile will be # of non-null elements in group
1232-
if tiebreak != TIEBREAK_DENSE:
1233-
grp_size = i - grp_start + 1 - grp_na_count
1234-
1235-
# Otherwise, it will be the number of distinct values
1236-
# in the group, subtracting 1 if NaNs are present
1237-
# since that is a distinct value we shouldn't count
1238-
else:
1239-
grp_size = grp_vals_seen - (grp_na_count > 0)
1240-
1241-
for j in range(grp_start, i + 1):
1242-
grp_sizes[sort_indexer[j]] = grp_size
1243-
1244-
dups = sum_ranks = 0
1245-
grp_na_count = 0
1246-
grp_start = i + 1
1247-
grp_vals_seen = 1
1248-
else:
1148+
with gil(numeric_object_t is object):
12491149
for i in range(N):
12501150
at_end = i == N - 1
12511151

@@ -1255,8 +1155,12 @@ cdef void rank_sorted_1d(
12551155
dups += 1
12561156
sum_ranks += i - grp_start + 1
12571157

1258-
next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1259-
!= masked_vals[sort_indexer[i+1]])
1158+
if numeric_object_t is object:
1159+
next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160+
masked_vals[sort_indexer[i+1]])
1161+
else:
1162+
next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1163+
!= masked_vals[sort_indexer[i+1]])
12601164

12611165
# We'll need this check later anyway to determine group size, so just
12621166
# compute it here since shortcircuiting won't help
@@ -1269,10 +1173,9 @@ cdef void rank_sorted_1d(
12691173
# the number of occurrence of current value) and assign the ranks
12701174
# based on the starting index of the current group (grp_start)
12711175
# and the current index
1272-
if (next_val_diff or group_changed
1273-
or (check_mask and
1274-
(mask[sort_indexer[i]] ^ mask[sort_indexer[i+1]]))):
1275-
1176+
if (next_val_diff or group_changed or (check_mask and
1177+
(mask[sort_indexer[i]]
1178+
^ mask[sort_indexer[i+1]]))):
12761179
# If keep_na, check for missing values and assign back
12771180
# to the result where appropriate
12781181
if keep_na and check_mask and mask[sort_indexer[i]]:

0 commit comments

Comments
 (0)