@@ -44,8 +44,6 @@ from pandas._libs.algos import (
44
44
)
45
45
46
46
from pandas._libs.dtypes cimport (
47
- iu_64_floating_obj_t,
48
- iu_64_floating_t,
49
47
numeric_object_t,
50
48
numeric_t,
51
49
)
@@ -1019,13 +1017,13 @@ cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):
1019
1017
1020
1018
1021
1019
# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
1022
- # use `const iu_64_floating_obj_t [:, :] values`
1020
+ # use `const numeric_object_t [:, :] values`
1023
1021
@ cython.wraparound (False )
1024
1022
@ cython.boundscheck (False )
1025
1023
def group_last (
1026
- iu_64_floating_obj_t [:, ::1] out ,
1024
+ numeric_object_t [:, ::1] out ,
1027
1025
int64_t[::1] counts ,
1028
- ndarray[iu_64_floating_obj_t , ndim = 2 ] values,
1026
+ ndarray[numeric_object_t , ndim = 2 ] values,
1029
1027
const intp_t[::1] labels ,
1030
1028
const uint8_t[:, :] mask ,
1031
1029
uint8_t[:, ::1] result_mask = None ,
@@ -1037,8 +1035,8 @@ def group_last(
1037
1035
"""
1038
1036
cdef:
1039
1037
Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
1040
- iu_64_floating_obj_t val
1041
- ndarray[iu_64_floating_obj_t , ndim = 2 ] resx
1038
+ numeric_object_t val
1039
+ ndarray[numeric_object_t , ndim = 2 ] resx
1042
1040
ndarray[int64_t , ndim = 2 ] nobs
1043
1041
bint uses_mask = mask is not None
1044
1042
bint isna_entry
@@ -1050,14 +1048,14 @@ def group_last(
1050
1048
1051
1049
min_count = max (min_count, 1 )
1052
1050
nobs = np.zeros((< object > out).shape, dtype = np.int64)
1053
- if iu_64_floating_obj_t is object :
1051
+ if numeric_object_t is object :
1054
1052
resx = np.empty((< object > out).shape, dtype = object )
1055
1053
else :
1056
1054
resx = np.empty_like(out)
1057
1055
1058
1056
N, K = (< object > values).shape
1059
1057
1060
- if iu_64_floating_obj_t is object :
1058
+ if numeric_object_t is object :
1061
1059
# TODO(cython3): De-duplicate once conditional-nogil is available
1062
1060
for i in range (N):
1063
1061
lab = labels[i]
@@ -1118,28 +1116,27 @@ def group_last(
1118
1116
# set a placeholder value in out[i, j].
1119
1117
if uses_mask:
1120
1118
result_mask[i, j] = True
1121
- elif iu_64_floating_obj_t is int64_t:
1119
+ elif numeric_object_t is float32_t or numeric_object_t is float64_t:
1120
+ out[i, j] = NAN
1121
+ elif numeric_object_t is int64_t:
1122
1122
# Per above, this is a placeholder in
1123
1123
# non-is_datetimelike cases.
1124
1124
out[i, j] = NPY_NAT
1125
- elif iu_64_floating_obj_t is uint64_t :
1125
+ else :
1126
1126
# placeholder, see above
1127
1127
out[i, j] = 0
1128
- else :
1129
- out[i, j] = NAN
1130
-
1131
1128
else :
1132
1129
out[i, j] = resx[i, j]
1133
1130
1134
1131
1135
1132
# TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
1136
- # use `const iu_64_floating_obj_t [:, :] values`
1133
+ # use `const numeric_object_t [:, :] values`
1137
1134
@ cython.wraparound (False )
1138
1135
@ cython.boundscheck (False )
1139
1136
def group_nth (
1140
- iu_64_floating_obj_t [:, ::1] out ,
1137
+ numeric_object_t [:, ::1] out ,
1141
1138
int64_t[::1] counts ,
1142
- ndarray[iu_64_floating_obj_t , ndim = 2 ] values,
1139
+ ndarray[numeric_object_t , ndim = 2 ] values,
1143
1140
const intp_t[::1] labels ,
1144
1141
const uint8_t[:, :] mask ,
1145
1142
uint8_t[:, ::1] result_mask = None ,
@@ -1152,8 +1149,8 @@ def group_nth(
1152
1149
"""
1153
1150
cdef:
1154
1151
Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
1155
- iu_64_floating_obj_t val
1156
- ndarray[iu_64_floating_obj_t , ndim = 2 ] resx
1152
+ numeric_object_t val
1153
+ ndarray[numeric_object_t , ndim = 2 ] resx
1157
1154
ndarray[int64_t , ndim = 2 ] nobs
1158
1155
bint uses_mask = mask is not None
1159
1156
bint isna_entry
@@ -1165,14 +1162,14 @@ def group_nth(
1165
1162
1166
1163
min_count = max (min_count, 1 )
1167
1164
nobs = np.zeros((< object > out).shape, dtype = np.int64)
1168
- if iu_64_floating_obj_t is object :
1165
+ if numeric_object_t is object :
1169
1166
resx = np.empty((< object > out).shape, dtype = object )
1170
1167
else :
1171
1168
resx = np.empty_like(out)
1172
1169
1173
1170
N, K = (< object > values).shape
1174
1171
1175
- if iu_64_floating_obj_t is object :
1172
+ if numeric_object_t is object :
1176
1173
# TODO(cython3): De-duplicate once conditional-nogil is available
1177
1174
for i in range (N):
1178
1175
lab = labels[i]
@@ -1223,6 +1220,7 @@ def group_nth(
1223
1220
if nobs[lab, j] == rank:
1224
1221
resx[lab, j] = val
1225
1222
1223
+ # TODO: de-dup this whoel block with group_last?
1226
1224
for i in range (ncounts):
1227
1225
for j in range (K):
1228
1226
if nobs[i, j] < min_count:
@@ -1235,15 +1233,16 @@ def group_nth(
1235
1233
if uses_mask:
1236
1234
result_mask[i, j] = True
1237
1235
out[i, j] = 0
1238
- elif iu_64_floating_obj_t is int64_t:
1236
+ elif numeric_object_t is float32_t or numeric_object_t is float64_t:
1237
+ out[i, j] = NAN
1238
+ elif numeric_object_t is int64_t:
1239
1239
# Per above, this is a placeholder in
1240
1240
# non-is_datetimelike cases.
1241
1241
out[i, j] = NPY_NAT
1242
- elif iu_64_floating_obj_t is uint64_t :
1242
+ else :
1243
1243
# placeholder, see above
1244
1244
out[i, j] = 0
1245
- else :
1246
- out[i, j] = NAN
1245
+
1247
1246
else :
1248
1247
out[i, j] = resx[i, j]
1249
1248
@@ -1252,7 +1251,7 @@ def group_nth(
1252
1251
@ cython.wraparound (False )
1253
1252
def group_rank (
1254
1253
float64_t[:, ::1] out ,
1255
- ndarray[iu_64_floating_obj_t , ndim = 2 ] values,
1254
+ ndarray[numeric_object_t , ndim = 2 ] values,
1256
1255
const intp_t[::1] labels ,
1257
1256
int ngroups ,
1258
1257
bint is_datetimelike ,
@@ -1268,7 +1267,7 @@ def group_rank(
1268
1267
----------
1269
1268
out : np.ndarray[np.float64 , ndim = 2 ]
1270
1269
Values to which this method will write its results.
1271
- values : np.ndarray of iu_64_floating_obj_t values to be ranked
1270
+ values : np.ndarray of numeric_object_t values to be ranked
1272
1271
labels : np.ndarray[np.intp]
1273
1272
Array containing unique label for each group , with its ordering
1274
1273
matching up to the corresponding record in `values`
@@ -1322,14 +1321,13 @@ def group_rank(
1322
1321
# group_min, group_max
1323
1322
# ----------------------------------------------------------------------
1324
1323
1325
- # TODO: consider implementing for more dtypes
1326
1324
1327
1325
@ cython.wraparound (False )
1328
1326
@ cython.boundscheck (False )
1329
1327
cdef group_min_max(
1330
- iu_64_floating_t [:, ::1 ] out,
1328
+ numeric_t [:, ::1 ] out,
1331
1329
int64_t[::1 ] counts,
1332
- ndarray[iu_64_floating_t , ndim= 2 ] values,
1330
+ ndarray[numeric_t , ndim= 2 ] values,
1333
1331
const intp_t[::1 ] labels,
1334
1332
Py_ssize_t min_count = - 1 ,
1335
1333
bint is_datetimelike = False ,
@@ -1342,7 +1340,7 @@ cdef group_min_max(
1342
1340
1343
1341
Parameters
1344
1342
----------
1345
- out : np.ndarray[iu_64_floating_t , ndim=2]
1343
+ out : np.ndarray[numeric_t , ndim=2]
1346
1344
Array to store result in.
1347
1345
counts : np.ndarray[int64]
1348
1346
Input as a zeroed array, populated by group sizes during algorithm
@@ -1371,8 +1369,8 @@ cdef group_min_max(
1371
1369
"""
1372
1370
cdef:
1373
1371
Py_ssize_t i, j, N, K, lab, ngroups = len (counts)
1374
- iu_64_floating_t val, nan_val
1375
- ndarray[iu_64_floating_t , ndim= 2 ] group_min_or_max
1372
+ numeric_t val, nan_val
1373
+ ndarray[numeric_t , ndim= 2 ] group_min_or_max
1376
1374
int64_t[:, ::1 ] nobs
1377
1375
bint uses_mask = mask is not None
1378
1376
bint isna_entry
@@ -1386,16 +1384,20 @@ cdef group_min_max(
1386
1384
nobs = np.zeros((< object > out).shape, dtype = np.int64)
1387
1385
1388
1386
group_min_or_max = np.empty_like(out)
1389
- group_min_or_max[:] = _get_min_or_max(< iu_64_floating_t > 0 , compute_max, is_datetimelike)
1387
+ group_min_or_max[:] = _get_min_or_max(< numeric_t > 0 , compute_max, is_datetimelike)
1390
1388
1391
- if iu_64_floating_t is int64_t:
1389
+ # NB: We do not define nan_val because there is no such thing
1390
+ # for uint64_t. We carefully avoid having to reference it in this
1391
+ # case.
1392
+ if numeric_t is int64_t:
1392
1393
nan_val = NPY_NAT
1393
- elif iu_64_floating_t is uint64_t:
1394
- # NB: We do not define nan_val because there is no such thing
1395
- # for uint64_t. We carefully avoid having to reference it in this
1396
- # case.
1397
- pass
1398
- else :
1394
+ elif numeric_t is int32_t:
1395
+ nan_val = util.INT32_MIN
1396
+ elif numeric_t is int16_t:
1397
+ nan_val = util.INT16_MIN
1398
+ elif numeric_t is int8_t:
1399
+ nan_val = util.INT8_MIN
1400
+ elif numeric_t is float64_t or numeric_t is float32_t:
1399
1401
nan_val = NAN
1400
1402
1401
1403
N, K = (< object > values).shape
@@ -1439,25 +1441,25 @@ cdef group_min_max(
1439
1441
# it was initialized with np.empty. Also ensures
1440
1442
# we can downcast out if appropriate.
1441
1443
out[i, j] = 0
1442
- elif iu_64_floating_t is int64_t:
1444
+ elif numeric_t is float32_t or numeric_t is float64_t:
1445
+ out[i, j] = nan_val
1446
+ elif numeric_t is int64_t:
1443
1447
# Per above, this is a placeholder in
1444
1448
# non-is_datetimelike cases.
1445
1449
out[i, j] = nan_val
1446
- elif iu_64_floating_t is uint64_t :
1450
+ else :
1447
1451
# placeholder, see above
1448
1452
out[i, j] = 0
1449
- else :
1450
- out[i, j] = nan_val
1451
1453
else :
1452
1454
out[i, j] = group_min_or_max[i, j]
1453
1455
1454
1456
1455
1457
@ cython.wraparound (False )
1456
1458
@ cython.boundscheck (False )
1457
1459
def group_max (
1458
- iu_64_floating_t [:, ::1] out ,
1460
+ numeric_t [:, ::1] out ,
1459
1461
int64_t[::1] counts ,
1460
- ndarray[iu_64_floating_t , ndim = 2 ] values,
1462
+ ndarray[numeric_t , ndim = 2 ] values,
1461
1463
const intp_t[::1] labels ,
1462
1464
Py_ssize_t min_count = - 1 ,
1463
1465
bint is_datetimelike = False ,
@@ -1481,9 +1483,9 @@ def group_max(
1481
1483
@cython.wraparound(False )
1482
1484
@cython.boundscheck(False )
1483
1485
def group_min(
1484
- iu_64_floating_t [:, ::1] out ,
1486
+ numeric_t [:, ::1] out ,
1485
1487
int64_t[::1] counts ,
1486
- ndarray[iu_64_floating_t , ndim = 2 ] values,
1488
+ ndarray[numeric_t , ndim = 2 ] values,
1487
1489
const intp_t[::1] labels ,
1488
1490
Py_ssize_t min_count = - 1 ,
1489
1491
bint is_datetimelike = False ,
@@ -1507,8 +1509,8 @@ def group_min(
1507
1509
@cython.boundscheck(False )
1508
1510
@cython.wraparound(False )
1509
1511
cdef group_cummin_max(
1510
- iu_64_floating_t [:, ::1] out ,
1511
- ndarray[iu_64_floating_t , ndim = 2 ] values,
1512
+ numeric_t [:, ::1] out ,
1513
+ ndarray[numeric_t , ndim = 2 ] values,
1512
1514
const uint8_t[:, ::1] mask ,
1513
1515
uint8_t[:, ::1] result_mask ,
1514
1516
const intp_t[::1] labels ,
@@ -1522,9 +1524,9 @@ cdef group_cummin_max(
1522
1524
1523
1525
Parameters
1524
1526
----------
1525
- out : np.ndarray[iu_64_floating_t , ndim=2]
1527
+ out : np.ndarray[numeric_t , ndim=2]
1526
1528
Array to store cummin/max in.
1527
- values : np.ndarray[iu_64_floating_t , ndim=2]
1529
+ values : np.ndarray[numeric_t , ndim=2]
1528
1530
Values to take cummin/max of.
1529
1531
mask : np.ndarray[bool] or None
1530
1532
If not None, indices represent missing values,
@@ -1549,25 +1551,25 @@ cdef group_cummin_max(
1549
1551
This method modifies the `out` parameter, rather than returning an object.
1550
1552
"""
1551
1553
cdef:
1552
- iu_64_floating_t [:, ::1 ] accum
1554
+ numeric_t [:, ::1 ] accum
1553
1555
Py_ssize_t i, j, N, K
1554
- iu_64_floating_t val, mval, na_val
1556
+ numeric_t val, mval, na_val
1555
1557
uint8_t[:, ::1 ] seen_na
1556
1558
intp_t lab
1557
1559
bint na_possible
1558
1560
bint uses_mask = mask is not None
1559
1561
bint isna_entry
1560
1562
1561
1563
accum = np.empty((ngroups, (< object > values).shape[1 ]), dtype = values.dtype)
1562
- accum[:] = _get_min_or_max(< iu_64_floating_t > 0 , compute_max, is_datetimelike)
1564
+ accum[:] = _get_min_or_max(< numeric_t > 0 , compute_max, is_datetimelike)
1563
1565
1564
- na_val = _get_na_val(< iu_64_floating_t > 0 , is_datetimelike)
1566
+ na_val = _get_na_val(< numeric_t > 0 , is_datetimelike)
1565
1567
1566
1568
if uses_mask:
1567
1569
na_possible = True
1568
1570
# Will never be used, just to avoid uninitialized warning
1569
1571
na_val = 0
1570
- elif iu_64_floating_t is float64_t or iu_64_floating_t is float32_t:
1572
+ elif numeric_t is float64_t or numeric_t is float32_t:
1571
1573
na_possible = True
1572
1574
elif is_datetimelike:
1573
1575
na_possible = True
@@ -1620,8 +1622,8 @@ cdef group_cummin_max(
1620
1622
@ cython.boundscheck (False )
1621
1623
@ cython.wraparound (False )
1622
1624
def group_cummin (
1623
- iu_64_floating_t [:, ::1] out ,
1624
- ndarray[iu_64_floating_t , ndim = 2 ] values,
1625
+ numeric_t [:, ::1] out ,
1626
+ ndarray[numeric_t , ndim = 2 ] values,
1625
1627
const intp_t[::1] labels ,
1626
1628
int ngroups ,
1627
1629
bint is_datetimelike ,
@@ -1646,8 +1648,8 @@ def group_cummin(
1646
1648
@cython.boundscheck(False )
1647
1649
@cython.wraparound(False )
1648
1650
def group_cummax(
1649
- iu_64_floating_t [:, ::1] out ,
1650
- ndarray[iu_64_floating_t , ndim = 2 ] values,
1651
+ numeric_t [:, ::1] out ,
1652
+ ndarray[numeric_t , ndim = 2 ] values,
1651
1653
const intp_t[::1] labels ,
1652
1654
int ngroups ,
1653
1655
bint is_datetimelike ,
0 commit comments