@@ -998,7 +998,8 @@ def rank_1d(
998
998
999
999
N = len (values)
1000
1000
if labels is not None :
1001
- assert len (labels) == N
1001
+ # TODO(cython3): cast won't be necessary (#2992)
1002
+ assert < Py_ssize_t> len (labels) == N
1002
1003
out = np.empty(N)
1003
1004
grp_sizes = np.ones(N, dtype = np.int64)
1004
1005
@@ -1087,7 +1088,8 @@ cdef void rank_sorted_1d(
1087
1088
float64_t[::1 ] out,
1088
1089
int64_t[::1 ] grp_sizes,
1089
1090
const intp_t[:] sort_indexer,
1090
- const numeric_object_t[:] masked_vals,
1091
+ # TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
1092
+ numeric_object_t[:] masked_vals,
1091
1093
const uint8_t[:] mask,
1092
1094
bint check_mask,
1093
1095
Py_ssize_t N,
@@ -1142,7 +1144,108 @@ cdef void rank_sorted_1d(
1142
1144
# array that we sorted previously, which gives us the location of
1143
1145
# that sorted value for retrieval back from the original
1144
1146
# values / masked_vals arrays
1145
- with gil(numeric_object_t is object ):
1147
+ # TODO(cython3): de-duplicate once cython supports conditional nogil
1148
+ if numeric_object_t is object :
1149
+ with gil:
1150
+ for i in range (N):
1151
+ at_end = i == N - 1
1152
+
1153
+ # dups and sum_ranks will be incremented each loop where
1154
+ # the value / group remains the same, and should be reset
1155
+ # when either of those change. Used to calculate tiebreakers
1156
+ dups += 1
1157
+ sum_ranks += i - grp_start + 1
1158
+
1159
+ next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160
+ masked_vals[sort_indexer[i+ 1 ]])
1161
+
1162
+ # We'll need this check later anyway to determine group size, so just
1163
+ # compute it here since shortcircuiting won't help
1164
+ group_changed = at_end or (check_labels and
1165
+ (labels[sort_indexer[i]]
1166
+ != labels[sort_indexer[i+ 1 ]]))
1167
+
1168
+ # Update out only when there is a transition of values or labels.
1169
+ # When a new value or group is encountered, go back #dups steps(
1170
+ # the number of occurrence of current value) and assign the ranks
1171
+ # based on the starting index of the current group (grp_start)
1172
+ # and the current index
1173
+ if (next_val_diff or group_changed or (check_mask and
1174
+ (mask[sort_indexer[i]]
1175
+ ^ mask[sort_indexer[i+ 1 ]]))):
1176
+
1177
+ # If keep_na, check for missing values and assign back
1178
+ # to the result where appropriate
1179
+ if keep_na and check_mask and mask[sort_indexer[i]]:
1180
+ grp_na_count = dups
1181
+ for j in range (i - dups + 1 , i + 1 ):
1182
+ out[sort_indexer[j]] = NaN
1183
+ elif tiebreak == TIEBREAK_AVERAGE:
1184
+ for j in range (i - dups + 1 , i + 1 ):
1185
+ out[sort_indexer[j]] = sum_ranks / < float64_t> dups
1186
+ elif tiebreak == TIEBREAK_MIN:
1187
+ for j in range (i - dups + 1 , i + 1 ):
1188
+ out[sort_indexer[j]] = i - grp_start - dups + 2
1189
+ elif tiebreak == TIEBREAK_MAX:
1190
+ for j in range (i - dups + 1 , i + 1 ):
1191
+ out[sort_indexer[j]] = i - grp_start + 1
1192
+
1193
+ # With n as the previous rank in the group and m as the number
1194
+ # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195
+ # then rankings should be n + 1, n + 2 ... n + m
1196
+ elif tiebreak == TIEBREAK_FIRST:
1197
+ for j in range (i - dups + 1 , i + 1 ):
1198
+ out[sort_indexer[j]] = j + 1 - grp_start
1199
+
1200
+ # If TIEBREAK_FIRST and descending, the ranking should be
1201
+ # n + m, n + (m - 1) ... n + 1. This is equivalent to
1202
+ # (i - dups + 1) + (i - j + 1) - grp_start
1203
+ elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204
+ for j in range (i - dups + 1 , i + 1 ):
1205
+ out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206
+ elif tiebreak == TIEBREAK_DENSE:
1207
+ for j in range (i - dups + 1 , i + 1 ):
1208
+ out[sort_indexer[j]] = grp_vals_seen
1209
+
1210
+ # Look forward to the next value (using the sorting in
1211
+ # lexsort_indexer). If the value does not equal the current
1212
+ # value then we need to reset the dups and sum_ranks, knowing
1213
+ # that a new value is coming up. The conditional also needs
1214
+ # to handle nan equality and the end of iteration. If group
1215
+ # changes we do not record seeing a new value in the group
1216
+ if not group_changed and (next_val_diff or (check_mask and
1217
+ (mask[sort_indexer[i]]
1218
+ ^ mask[sort_indexer[i+ 1 ]]))):
1219
+ dups = sum_ranks = 0
1220
+ grp_vals_seen += 1
1221
+
1222
+ # Similar to the previous conditional, check now if we are
1223
+ # moving to a new group. If so, keep track of the index where
1224
+ # the new group occurs, so the tiebreaker calculations can
1225
+ # decrement that from their position. Fill in the size of each
1226
+ # group encountered (used by pct calculations later). Also be
1227
+ # sure to reset any of the items helping to calculate dups
1228
+ if group_changed:
1229
+
1230
+ # If not dense tiebreak, group size used to compute
1231
+ # percentile will be # of non-null elements in group
1232
+ if tiebreak != TIEBREAK_DENSE:
1233
+ grp_size = i - grp_start + 1 - grp_na_count
1234
+
1235
+ # Otherwise, it will be the number of distinct values
1236
+ # in the group, subtracting 1 if NaNs are present
1237
+ # since that is a distinct value we shouldn't count
1238
+ else :
1239
+ grp_size = grp_vals_seen - (grp_na_count > 0 )
1240
+
1241
+ for j in range (grp_start, i + 1 ):
1242
+ grp_sizes[sort_indexer[j]] = grp_size
1243
+
1244
+ dups = sum_ranks = 0
1245
+ grp_na_count = 0
1246
+ grp_start = i + 1
1247
+ grp_vals_seen = 1
1248
+ else :
1146
1249
for i in range (N):
1147
1250
at_end = i == N - 1
1148
1251
@@ -1371,18 +1474,16 @@ ctypedef fused out_t:
1371
1474
@ cython.boundscheck (False )
1372
1475
@ cython.wraparound (False )
1373
1476
def diff_2d (
1374
- # TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
1375
- ndarray[diff_t , ndim = 2 ] arr,
1376
- out_t[:, :] out ,
1477
+ ndarray[diff_t , ndim = 2 ] arr, # TODO(cython3 ) update to "const diff_t[:, :] arr"
1478
+ ndarray[out_t , ndim = 2 ] out,
1377
1479
Py_ssize_t periods ,
1378
1480
int axis ,
1379
1481
bint datetimelike = False ,
1380
1482
):
1381
1483
cdef:
1382
1484
Py_ssize_t i, j, sx, sy, start, stop
1383
1485
bint f_contig = arr.flags.f_contiguous
1384
- # TODO: change to this when arr becomes a memoryview
1385
- # bint f_contig = arr.is_f_contig()
1486
+ # bint f_contig = arr.is_f_contig() # TODO(cython3)
1386
1487
diff_t left, right
1387
1488
1388
1489
# Disable for unsupported dtype combinations,
0 commit comments