@@ -1047,6 +1047,7 @@ def group_last(
1047
1047
const uint8_t[:, :] mask ,
1048
1048
uint8_t[:, ::1] result_mask = None ,
1049
1049
Py_ssize_t min_count = - 1 ,
1050
+ bint is_datetimelike = False ,
1050
1051
) -> None:
1051
1052
"""
1052
1053
Only aggregates on axis = 0
@@ -1056,7 +1057,6 @@ def group_last(
1056
1057
iu_64_floating_obj_t val
1057
1058
ndarray[iu_64_floating_obj_t , ndim = 2 ] resx
1058
1059
ndarray[int64_t , ndim = 2 ] nobs
1059
- bint runtime_error = False
1060
1060
bint uses_mask = mask is not None
1061
1061
bint isna_entry
1062
1062
@@ -1116,35 +1116,38 @@ def group_last(
1116
1116
if uses_mask:
1117
1117
isna_entry = mask[i, j]
1118
1118
else :
1119
- isna_entry = _treat_as_na(val, True )
1120
- # TODO: Sure we always want is_datetimelike=True?
1119
+ isna_entry = _treat_as_na(val, is_datetimelike)
1121
1120
1122
1121
if not isna_entry:
1123
1122
nobs[lab, j] += 1
1124
1123
resx[lab, j] = val
1125
1124
1126
1125
for i in range (ncounts):
1127
1126
for j in range (K):
1127
+ # TODO(cython3): the entire next block can be shared
1128
+ # across 3 places once conditional-nogil is available
1128
1129
if nobs[i, j] < min_count:
1130
+ # if we are integer dtype, not is_datetimelike, and
1131
+ # not uses_mask, then getting here implies that
1132
+ # counts[i] < min_count, which means we will
1133
+ # be cast to float64 and masked at the end
1134
+ # of WrappedCythonOp._call_cython_op. So we can safely
1135
+ # set a placeholder value in out[i, j].
1129
1136
if uses_mask:
1130
1137
result_mask[i, j] = True
1131
1138
elif iu_64_floating_obj_t is int64_t:
1132
- # TODO: only if datetimelike?
1139
+ # Per above, this is a placeholder in
1140
+ # non-is_datetimelike cases.
1133
1141
out[i, j] = NPY_NAT
1134
1142
elif iu_64_floating_obj_t is uint64_t:
1135
- runtime_error = True
1136
- break
1143
+ # placeholder, see above
1144
+ out[i, j] = 0
1137
1145
else :
1138
1146
out[i, j] = NAN
1139
1147
1140
1148
else :
1141
1149
out[i, j] = resx[i, j]
1142
1150
1143
- if runtime_error:
1144
- # We cannot raise directly above because that is within a nogil
1145
- # block.
1146
- raise RuntimeError (" empty group with uint64_t" )
1147
-
1148
1151
1149
1152
# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
1150
1153
# use `const iu_64_floating_obj_t[:, :] values`
@@ -1159,6 +1162,7 @@ def group_nth(
1159
1162
uint8_t[:, ::1] result_mask = None ,
1160
1163
int64_t min_count = - 1 ,
1161
1164
int64_t rank = 1 ,
1165
+ bint is_datetimelike = False ,
1162
1166
) -> None:
1163
1167
"""
1164
1168
Only aggregates on axis = 0
@@ -1168,7 +1172,6 @@ def group_nth(
1168
1172
iu_64_floating_obj_t val
1169
1173
ndarray[iu_64_floating_obj_t , ndim = 2 ] resx
1170
1174
ndarray[int64_t , ndim = 2 ] nobs
1171
- bint runtime_error = False
1172
1175
bint uses_mask = mask is not None
1173
1176
bint isna_entry
1174
1177
@@ -1230,8 +1233,7 @@ def group_nth(
1230
1233
if uses_mask:
1231
1234
isna_entry = mask[i, j]
1232
1235
else :
1233
- isna_entry = _treat_as_na(val, True )
1234
- # TODO: Sure we always want is_datetimelike=True?
1236
+ isna_entry = _treat_as_na(val, is_datetimelike)
1235
1237
1236
1238
if not isna_entry:
1237
1239
nobs[lab, j] += 1
@@ -1241,25 +1243,27 @@ def group_nth(
1241
1243
for i in range (ncounts):
1242
1244
for j in range (K):
1243
1245
if nobs[i, j] < min_count:
1246
+ # if we are integer dtype, not is_datetimelike, and
1247
+ # not uses_mask, then getting here implies that
1248
+ # counts[i] < min_count, which means we will
1249
+ # be cast to float64 and masked at the end
1250
+ # of WrappedCythonOp._call_cython_op. So we can safely
1251
+ # set a placeholder value in out[i, j].
1244
1252
if uses_mask:
1245
1253
result_mask[i, j] = True
1246
1254
out[i, j] = 0
1247
1255
elif iu_64_floating_obj_t is int64_t:
1248
- # TODO: only if datetimelike?
1256
+ # Per above, this is a placeholder in
1257
+ # non-is_datetimelike cases.
1249
1258
out[i, j] = NPY_NAT
1250
1259
elif iu_64_floating_obj_t is uint64_t:
1251
- runtime_error = True
1252
- break
1260
+ # placeholder, see above
1261
+ out[i, j] = 0
1253
1262
else :
1254
1263
out[i, j] = NAN
1255
1264
else :
1256
1265
out[i, j] = resx[i, j]
1257
1266
1258
- if runtime_error:
1259
- # We cannot raise directly above because that is within a nogil
1260
- # block.
1261
- raise RuntimeError (" empty group with uint64_t" )
1262
-
1263
1267
1264
1268
@ cython.boundscheck (False )
1265
1269
@ cython.wraparound (False )
@@ -1386,7 +1390,6 @@ cdef group_min_max(
1386
1390
Py_ssize_t i, j, N, K, lab, ngroups = len (counts)
1387
1391
iu_64_floating_t val, nan_val
1388
1392
ndarray[iu_64_floating_t, ndim= 2 ] group_min_or_max
1389
- bint runtime_error = False
1390
1393
int64_t[:, ::1 ] nobs
1391
1394
bint uses_mask = mask is not None
1392
1395
bint isna_entry
@@ -1403,7 +1406,6 @@ cdef group_min_max(
1403
1406
group_min_or_max[:] = _get_min_or_max(< iu_64_floating_t> 0 , compute_max, is_datetimelike)
1404
1407
1405
1408
if iu_64_floating_t is int64_t:
1406
- # TODO: only if is_datetimelike?
1407
1409
nan_val = NPY_NAT
1408
1410
elif iu_64_floating_t is uint64_t:
1409
1411
# NB: We do not define nan_val because there is no such thing
@@ -1442,25 +1444,30 @@ cdef group_min_max(
1442
1444
for i in range (ngroups):
1443
1445
for j in range (K):
1444
1446
if nobs[i, j] < min_count:
1447
+ # if we are integer dtype, not is_datetimelike, and
1448
+ # not uses_mask, then getting here implies that
1449
+ # counts[i] < min_count, which means we will
1450
+ # be cast to float64 and masked at the end
1451
+ # of WrappedCythonOp._call_cython_op. So we can safely
1452
+ # set a placeholder value in out[i, j].
1445
1453
if uses_mask:
1446
1454
result_mask[i, j] = True
1447
1455
# set out[i, j] to 0 to be deterministic, as
1448
1456
# it was initialized with np.empty. Also ensures
1449
1457
# we can downcast out if appropriate.
1450
1458
out[i, j] = 0
1459
+ elif iu_64_floating_t is int64_t:
1460
+ # Per above, this is a placeholder in
1461
+ # non-is_datetimelike cases.
1462
+ out[i, j] = nan_val
1451
1463
elif iu_64_floating_t is uint64_t:
1452
- runtime_error = True
1453
- break
1464
+ # placeholder, see above
1465
+ out[i, j] = 0
1454
1466
else :
1455
1467
out[i, j] = nan_val
1456
1468
else :
1457
1469
out[i, j] = group_min_or_max[i, j]
1458
1470
1459
- if runtime_error:
1460
- # We cannot raise directly above because that is within a nogil
1461
- # block.
1462
- raise RuntimeError (" empty group with uint64_t" )
1463
-
1464
1471
1465
1472
@ cython.wraparound (False )
1466
1473
@ cython.boundscheck (False )
0 commit comments