@@ -1145,107 +1145,7 @@ cdef void rank_sorted_1d(
1145
1145
# that sorted value for retrieval back from the original
1146
1146
# values / masked_vals arrays
1147
1147
# TODO(cython3): de-duplicate once cython supports conditional nogil
1148
- if numeric_object_t is object :
1149
- with gil:
1150
- for i in range (N):
1151
- at_end = i == N - 1
1152
-
1153
- # dups and sum_ranks will be incremented each loop where
1154
- # the value / group remains the same, and should be reset
1155
- # when either of those change. Used to calculate tiebreakers
1156
- dups += 1
1157
- sum_ranks += i - grp_start + 1
1158
-
1159
- next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160
- masked_vals[sort_indexer[i+ 1 ]])
1161
-
1162
- # We'll need this check later anyway to determine group size, so just
1163
- # compute it here since shortcircuiting won't help
1164
- group_changed = at_end or (check_labels and
1165
- (labels[sort_indexer[i]]
1166
- != labels[sort_indexer[i+ 1 ]]))
1167
-
1168
- # Update out only when there is a transition of values or labels.
1169
- # When a new value or group is encountered, go back #dups steps(
1170
- # the number of occurrence of current value) and assign the ranks
1171
- # based on the starting index of the current group (grp_start)
1172
- # and the current index
1173
- if (next_val_diff or group_changed or (check_mask and
1174
- (mask[sort_indexer[i]]
1175
- ^ mask[sort_indexer[i+ 1 ]]))):
1176
-
1177
- # If keep_na, check for missing values and assign back
1178
- # to the result where appropriate
1179
- if keep_na and check_mask and mask[sort_indexer[i]]:
1180
- grp_na_count = dups
1181
- for j in range (i - dups + 1 , i + 1 ):
1182
- out[sort_indexer[j]] = NaN
1183
- elif tiebreak == TIEBREAK_AVERAGE:
1184
- for j in range (i - dups + 1 , i + 1 ):
1185
- out[sort_indexer[j]] = sum_ranks / < float64_t> dups
1186
- elif tiebreak == TIEBREAK_MIN:
1187
- for j in range (i - dups + 1 , i + 1 ):
1188
- out[sort_indexer[j]] = i - grp_start - dups + 2
1189
- elif tiebreak == TIEBREAK_MAX:
1190
- for j in range (i - dups + 1 , i + 1 ):
1191
- out[sort_indexer[j]] = i - grp_start + 1
1192
-
1193
- # With n as the previous rank in the group and m as the number
1194
- # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195
- # then rankings should be n + 1, n + 2 ... n + m
1196
- elif tiebreak == TIEBREAK_FIRST:
1197
- for j in range (i - dups + 1 , i + 1 ):
1198
- out[sort_indexer[j]] = j + 1 - grp_start
1199
-
1200
- # If TIEBREAK_FIRST and descending, the ranking should be
1201
- # n + m, n + (m - 1) ... n + 1. This is equivalent to
1202
- # (i - dups + 1) + (i - j + 1) - grp_start
1203
- elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204
- for j in range (i - dups + 1 , i + 1 ):
1205
- out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206
- elif tiebreak == TIEBREAK_DENSE:
1207
- for j in range (i - dups + 1 , i + 1 ):
1208
- out[sort_indexer[j]] = grp_vals_seen
1209
-
1210
- # Look forward to the next value (using the sorting in
1211
- # lexsort_indexer). If the value does not equal the current
1212
- # value then we need to reset the dups and sum_ranks, knowing
1213
- # that a new value is coming up. The conditional also needs
1214
- # to handle nan equality and the end of iteration. If group
1215
- # changes we do not record seeing a new value in the group
1216
- if not group_changed and (next_val_diff or (check_mask and
1217
- (mask[sort_indexer[i]]
1218
- ^ mask[sort_indexer[i+ 1 ]]))):
1219
- dups = sum_ranks = 0
1220
- grp_vals_seen += 1
1221
-
1222
- # Similar to the previous conditional, check now if we are
1223
- # moving to a new group. If so, keep track of the index where
1224
- # the new group occurs, so the tiebreaker calculations can
1225
- # decrement that from their position. Fill in the size of each
1226
- # group encountered (used by pct calculations later). Also be
1227
- # sure to reset any of the items helping to calculate dups
1228
- if group_changed:
1229
-
1230
- # If not dense tiebreak, group size used to compute
1231
- # percentile will be # of non-null elements in group
1232
- if tiebreak != TIEBREAK_DENSE:
1233
- grp_size = i - grp_start + 1 - grp_na_count
1234
-
1235
- # Otherwise, it will be the number of distinct values
1236
- # in the group, subtracting 1 if NaNs are present
1237
- # since that is a distinct value we shouldn't count
1238
- else :
1239
- grp_size = grp_vals_seen - (grp_na_count > 0 )
1240
-
1241
- for j in range (grp_start, i + 1 ):
1242
- grp_sizes[sort_indexer[j]] = grp_size
1243
-
1244
- dups = sum_ranks = 0
1245
- grp_na_count = 0
1246
- grp_start = i + 1
1247
- grp_vals_seen = 1
1248
- else :
1148
+ with gil(numeric_object_t is object ):
1249
1149
for i in range (N):
1250
1150
at_end = i == N - 1
1251
1151
@@ -1255,8 +1155,12 @@ cdef void rank_sorted_1d(
1255
1155
dups += 1
1256
1156
sum_ranks += i - grp_start + 1
1257
1157
1258
- next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1259
- != masked_vals[sort_indexer[i+ 1 ]])
1158
+ if numeric_object_t is object :
1159
+ next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160
+ masked_vals[sort_indexer[i+ 1 ]])
1161
+ else :
1162
+ next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1163
+ != masked_vals[sort_indexer[i+ 1 ]])
1260
1164
1261
1165
# We'll need this check later anyway to determine group size, so just
1262
1166
# compute it here since shortcircuiting won't help
@@ -1269,10 +1173,9 @@ cdef void rank_sorted_1d(
1269
1173
# the number of occurrence of current value) and assign the ranks
1270
1174
# based on the starting index of the current group (grp_start)
1271
1175
# and the current index
1272
- if (next_val_diff or group_changed
1273
- or (check_mask and
1274
- (mask[sort_indexer[i]] ^ mask[sort_indexer[i+ 1 ]]))):
1275
-
1176
+ if (next_val_diff or group_changed or (check_mask and
1177
+ (mask[sort_indexer[i]]
1178
+ ^ mask[sort_indexer[i+ 1 ]]))):
1276
1179
# If keep_na, check for missing values and assign back
1277
1180
# to the result where appropriate
1278
1181
if keep_na and check_mask and mask[sort_indexer[i]]:
0 commit comments