Skip to content

REF: combine masked_cummin_max, cummin_max #46142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 24 additions & 73 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,13 @@ cdef group_cummin_max(iu_64_floating_t[:, ::1] out,
"""
cdef:
iu_64_floating_t[:, ::1] accum
Py_ssize_t i, j, N, K
iu_64_floating_t val, mval, na_val
uint8_t[:, ::1] seen_na
intp_t lab
bint na_possible
bint uses_mask = mask is not None
bint isna_entry

accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
if iu_64_floating_t is int64_t:
Expand All @@ -1427,40 +1434,18 @@ cdef group_cummin_max(iu_64_floating_t[:, ::1] out,
else:
accum[:] = -np.inf if compute_max else np.inf

if mask is not None:
masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max)
else:
cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max)


@cython.boundscheck(False)
@cython.wraparound(False)
cdef cummin_max(iu_64_floating_t[:, ::1] out,
ndarray[iu_64_floating_t, ndim=2] values,
const intp_t[::1] labels,
iu_64_floating_t[:, ::1] accum,
bint skipna,
bint is_datetimelike,
bint compute_max):
"""
Compute the cumulative minimum/maximum of columns of `values`, in row groups
`labels`.
"""
cdef:
Py_ssize_t i, j, N, K
iu_64_floating_t val, mval, na_val
uint8_t[:, ::1] seen_na
intp_t lab
bint na_possible

if iu_64_floating_t is float64_t or iu_64_floating_t is float32_t:
if uses_mask:
na_possible = True
# Will never be used, just to avoid uninitialized warning
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could just define na_val = 0 in the cdef above?

na_val = 0
elif iu_64_floating_t is float64_t or iu_64_floating_t is float32_t:
na_val = NaN
na_possible = True
elif is_datetimelike:
na_val = NPY_NAT
na_possible = True
# Will never be used, just to avoid uninitialized warning
else:
# Will never be used, just to avoid uninitialized warning
na_val = 0
na_possible = False

Expand All @@ -1474,56 +1459,21 @@ cdef cummin_max(iu_64_floating_t[:, ::1] out,
if lab < 0:
continue
for j in range(K):

if not skipna and na_possible and seen_na[lab, j]:
out[i, j] = na_val
if uses_mask:
mask[i, j] = 1 # FIXME: shouldn't alter inplace
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

follow-up in the works to fix this

else:
out[i, j] = na_val
else:
val = values[i, j]
if not _treat_as_na(val, is_datetimelike):
mval = accum[lab, j]
if compute_max:
if val > mval:
accum[lab, j] = mval = val
else:
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval
else:
seen_na[lab, j] = 1
out[i, j] = val


@cython.boundscheck(False)
@cython.wraparound(False)
cdef masked_cummin_max(iu_64_floating_t[:, ::1] out,
ndarray[iu_64_floating_t, ndim=2] values,
uint8_t[:, ::1] mask,
const intp_t[::1] labels,
iu_64_floating_t[:, ::1] accum,
bint skipna,
bint compute_max):
"""
Compute the cumulative minimum/maximum of columns of `values`, in row groups
`labels` with a masked algorithm.
"""
cdef:
Py_ssize_t i, j, N, K
iu_64_floating_t val, mval
uint8_t[:, ::1] seen_na
intp_t lab
if uses_mask:
isna_entry = mask[i, j]
else:
isna_entry = _treat_as_na(val, is_datetimelike)

N, K = (<object>values).shape
seen_na = np.zeros((<object>accum).shape, dtype=np.uint8)
with nogil:
for i in range(N):
lab = labels[i]
if lab < 0:
continue
for j in range(K):
if not skipna and seen_na[lab, j]:
mask[i, j] = 1
else:
if not mask[i, j]:
val = values[i, j]
if not isna_entry:
mval = accum[lab, j]
if compute_max:
if val > mval:
Expand All @@ -1534,6 +1484,7 @@ cdef masked_cummin_max(iu_64_floating_t[:, ::1] out,
out[i, j] = mval
else:
seen_na[lab, j] = 1
out[i, j] = val


@cython.boundscheck(False)
Expand Down