@@ -1317,6 +1317,7 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
1317
1317
const intp_t[:] labels ,
1318
1318
int ngroups ,
1319
1319
bint is_datetimelike ,
1320
+ bint skipna ,
1320
1321
bint compute_max ):
1321
1322
"""
1322
1323
Cumulative minimum/maximum of columns of `values`, in row groups `labels`.
@@ -1336,6 +1337,8 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
1336
1337
Number of groups, larger than all entries of `labels`.
1337
1338
is_datetimelike : bool
1338
1339
True if `values` contains datetime-like entries.
1340
+ skipna : bool
1341
+ If True, ignore nans in `values`.
1339
1342
compute_max : bool
1340
1343
True if cumulative maximum should be computed, False
1341
1344
if cumulative minimum should be computed
@@ -1356,9 +1359,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
1356
1359
accum[:] = - np.inf if compute_max else np.inf
1357
1360
1358
1361
if mask is not None :
1359
- masked_cummin_max(out, values, mask, labels, accum, compute_max)
1362
+ masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max)
1360
1363
else :
1361
- cummin_max(out, values, labels, accum, is_datetimelike, compute_max)
1364
+ cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)
1362
1365
1363
1366
1364
1367
@ cython.boundscheck (False )
@@ -1367,6 +1370,7 @@ cdef cummin_max(groupby_t[:, ::1] out,
1367
1370
ndarray[groupby_t, ndim= 2 ] values,
1368
1371
const intp_t[:] labels,
1369
1372
groupby_t[:, ::1 ] accum,
1373
+ bint skipna,
1370
1374
bint is_datetimelike,
1371
1375
bint compute_max):
1372
1376
"""
@@ -1375,8 +1379,24 @@ cdef cummin_max(groupby_t[:, ::1] out,
1375
1379
"""
1376
1380
cdef:
1377
1381
Py_ssize_t i, j, N, K
1378
- groupby_t val, mval
1382
+ groupby_t val, mval, na_val
1383
+ uint8_t[:, ::1 ] seen_na
1379
1384
intp_t lab
1385
+ bint na_possible
1386
+
1387
+ if groupby_t is float64_t or groupby_t is float32_t:
1388
+ na_val = NaN
1389
+ na_possible = True
1390
+ elif is_datetimelike:
1391
+ na_val = NPY_NAT
1392
+ na_possible = True
1393
+ # Will never be used, just to avoid uninitialized warning
1394
+ else :
1395
+ na_val = 0
1396
+ na_possible = False
1397
+
1398
+ if na_possible:
1399
+ seen_na = np.zeros((< object > accum).shape, dtype = np.uint8)
1380
1400
1381
1401
N, K = (< object > values).shape
1382
1402
with nogil:
@@ -1385,18 +1405,22 @@ cdef cummin_max(groupby_t[:, ::1] out,
1385
1405
if lab < 0 :
1386
1406
continue
1387
1407
for j in range (K):
1388
- val = values[i, j]
1389
- if not _treat_as_na(val, is_datetimelike):
1390
- mval = accum[lab, j]
1391
- if compute_max:
1392
- if val > mval:
1393
- accum[lab, j] = mval = val
1394
- else :
1395
- if val < mval:
1396
- accum[lab, j] = mval = val
1397
- out[i, j] = mval
1408
+ if not skipna and na_possible and seen_na[lab, j]:
1409
+ out[i, j] = na_val
1398
1410
else :
1399
- out[i, j] = val
1411
+ val = values[i, j]
1412
+ if not _treat_as_na(val, is_datetimelike):
1413
+ mval = accum[lab, j]
1414
+ if compute_max:
1415
+ if val > mval:
1416
+ accum[lab, j] = mval = val
1417
+ else :
1418
+ if val < mval:
1419
+ accum[lab, j] = mval = val
1420
+ out[i, j] = mval
1421
+ else :
1422
+ seen_na[lab, j] = 1
1423
+ out[i, j] = val
1400
1424
1401
1425
1402
1426
@ cython.boundscheck (False )
@@ -1406,6 +1430,7 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
1406
1430
uint8_t[:, ::1 ] mask,
1407
1431
const intp_t[:] labels,
1408
1432
groupby_t[:, ::1 ] accum,
1433
+ bint skipna,
1409
1434
bint compute_max):
1410
1435
"""
1411
1436
Compute the cumulative minimum/maximum of columns of `values`, in row groups
@@ -1414,25 +1439,32 @@ cdef masked_cummin_max(groupby_t[:, ::1] out,
1414
1439
cdef:
1415
1440
Py_ssize_t i, j, N, K
1416
1441
groupby_t val, mval
1442
+ uint8_t[:, ::1 ] seen_na
1417
1443
intp_t lab
1418
1444
1419
1445
N, K = (< object > values).shape
1446
+ seen_na = np.zeros((< object > accum).shape, dtype = np.uint8)
1420
1447
with nogil:
1421
1448
for i in range (N):
1422
1449
lab = labels[i]
1423
1450
if lab < 0 :
1424
1451
continue
1425
1452
for j in range (K):
1426
- if not mask[i, j]:
1427
- val = values[i, j]
1428
- mval = accum[lab, j]
1429
- if compute_max:
1430
- if val > mval:
1431
- accum[lab, j] = mval = val
1453
+ if not skipna and seen_na[lab, j]:
1454
+ mask[i, j] = 1
1455
+ else :
1456
+ if not mask[i, j]:
1457
+ val = values[i, j]
1458
+ mval = accum[lab, j]
1459
+ if compute_max:
1460
+ if val > mval:
1461
+ accum[lab, j] = mval = val
1462
+ else :
1463
+ if val < mval:
1464
+ accum[lab, j] = mval = val
1465
+ out[i, j] = mval
1432
1466
else :
1433
- if val < mval:
1434
- accum[lab, j] = mval = val
1435
- out[i, j] = mval
1467
+ seen_na[lab, j] = 1
1436
1468
1437
1469
1438
1470
@ cython.boundscheck (False )
@@ -1442,7 +1474,8 @@ def group_cummin(groupby_t[:, ::1] out,
1442
1474
const intp_t[:] labels ,
1443
1475
int ngroups ,
1444
1476
bint is_datetimelike ,
1445
- uint8_t[:, ::1] mask = None ) -> None:
1477
+ uint8_t[:, ::1] mask = None ,
1478
+ bint skipna = True ) -> None:
1446
1479
"""See group_cummin_max.__doc__"""
1447
1480
group_cummin_max(
1448
1481
out ,
@@ -1451,6 +1484,7 @@ def group_cummin(groupby_t[:, ::1] out,
1451
1484
labels ,
1452
1485
ngroups ,
1453
1486
is_datetimelike ,
1487
+ skipna ,
1454
1488
compute_max = False
1455
1489
)
1456
1490
@@ -1462,7 +1496,8 @@ def group_cummax(groupby_t[:, ::1] out,
1462
1496
const intp_t[:] labels ,
1463
1497
int ngroups ,
1464
1498
bint is_datetimelike ,
1465
- uint8_t[:, ::1] mask = None ) -> None:
1499
+ uint8_t[:, ::1] mask = None ,
1500
+ bint skipna = True ) -> None:
1466
1501
"""See group_cummin_max.__doc__"""
1467
1502
group_cummin_max(
1468
1503
out ,
@@ -1471,5 +1506,6 @@ def group_cummax(groupby_t[:, ::1] out,
1471
1506
labels ,
1472
1507
ngroups ,
1473
1508
is_datetimelike ,
1509
+ skipna ,
1474
1510
compute_max = True
1475
1511
)
0 commit comments