Skip to content

Commit d673e5a

Browse files
authored
PERF: support mask in group_last (#46107)
1 parent 150cf99 commit d673e5a

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

pandas/_libs/groupby.pyx

+20-3
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,8 @@ def group_last(iu_64_floating_obj_t[:, ::1] out,
957957
int64_t[::1] counts,
958958
ndarray[iu_64_floating_obj_t, ndim=2] values,
959959
const intp_t[::1] labels,
960+
const uint8_t[:, :] mask,
961+
uint8_t[:, ::1] result_mask=None,
960962
Py_ssize_t min_count=-1) -> None:
961963
"""
962964
Only aggregates on axis=0
@@ -967,6 +969,8 @@ def group_last(iu_64_floating_obj_t[:, ::1] out,
967969
ndarray[iu_64_floating_obj_t, ndim=2] resx
968970
ndarray[int64_t, ndim=2] nobs
969971
bint runtime_error = False
972+
bint uses_mask = mask is not None
973+
bint isna_entry
970974

971975
# TODO(cython3):
972976
# Instead of `labels.shape[0]` use `len(labels)`
@@ -993,7 +997,12 @@ def group_last(iu_64_floating_obj_t[:, ::1] out,
993997
for j in range(K):
994998
val = values[i, j]
995999

996-
if not checknull(val):
1000+
if uses_mask:
1001+
isna_entry = mask[i, j]
1002+
else:
1003+
isna_entry = checknull(val)
1004+
1005+
if not isna_entry:
9971006
# NB: use _treat_as_na here once
9981007
# conditional-nogil is available.
9991008
nobs[lab, j] += 1
@@ -1016,15 +1025,23 @@ def group_last(iu_64_floating_obj_t[:, ::1] out,
10161025
for j in range(K):
10171026
val = values[i, j]
10181027

1019-
if not _treat_as_na(val, True):
1028+
if uses_mask:
1029+
isna_entry = mask[i, j]
1030+
else:
1031+
isna_entry = _treat_as_na(val, True)
10201032
# TODO: Sure we always want is_datetimelike=True?
1033+
1034+
if not isna_entry:
10211035
nobs[lab, j] += 1
10221036
resx[lab, j] = val
10231037

10241038
for i in range(ncounts):
10251039
for j in range(K):
10261040
if nobs[i, j] < min_count:
1027-
if iu_64_floating_obj_t is int64_t:
1041+
if uses_mask:
1042+
result_mask[i, j] = True
1043+
elif iu_64_floating_obj_t is int64_t:
1044+
# TODO: only if datetimelike?
10281045
out[i, j] = NPY_NAT
10291046
elif iu_64_floating_obj_t is uint64_t:
10301047
runtime_error = True

pandas/core/groupby/ops.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ def __init__(self, kind: str, how: str):
138138
},
139139
}
140140

141-
_MASKED_CYTHON_FUNCTIONS = {"cummin", "cummax", "min", "max"}
141+
# "group_any" and "group_all" are also support masks, but don't go
142+
# through WrappedCythonOp
143+
_MASKED_CYTHON_FUNCTIONS = {"cummin", "cummax", "min", "max", "last"}
142144

143145
_cython_arity = {"ohlc": 4} # OHLC
144146

@@ -530,6 +532,16 @@ def _call_cython_op(
530532
result_mask=result_mask,
531533
is_datetimelike=is_datetimelike,
532534
)
535+
elif self.how in ["last"]:
536+
func(
537+
out=result,
538+
counts=counts,
539+
values=values,
540+
labels=comp_ids,
541+
min_count=min_count,
542+
mask=mask,
543+
result_mask=result_mask,
544+
)
533545
elif self.how in ["add"]:
534546
# We support datetimelike
535547
func(

0 commit comments

Comments
 (0)