-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: enable skipna on groupby reduction ops #43671
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
ee4134e
e319655
23c84b4
440ad04
de8b9fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -487,7 +487,8 @@ def group_add(add_t[:, ::1] out, | |||||
int64_t[::1] counts, | ||||||
ndarray[add_t, ndim=2] values, | ||||||
const intp_t[::1] labels, | ||||||
Py_ssize_t min_count=0) -> None: | ||||||
Py_ssize_t min_count=0, | ||||||
bint skipna=True) -> None: | ||||||
""" | ||||||
Only aggregates on axis=0 using Kahan summation | ||||||
""" | ||||||
|
@@ -530,6 +531,9 @@ def group_add(add_t[:, ::1] out, | |||||
else: | ||||||
t = sumx[lab, j] + val | ||||||
sumx[lab, j] = t | ||||||
elif skipna == False: | ||||||
# NOTE: Does this case need to be considered? | ||||||
pass | ||||||
|
||||||
for i in range(ncounts): | ||||||
for j in range(K): | ||||||
|
@@ -555,6 +559,10 @@ def group_add(add_t[:, ::1] out, | |||||
t = sumx[lab, j] + y | ||||||
compensation[lab, j] = t - sumx[lab, j] - y | ||||||
sumx[lab, j] = t | ||||||
# don't skip nan | ||||||
elif skipna == False: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
sumx[lab, j] = NAN | ||||||
break | ||||||
|
||||||
for i in range(ncounts): | ||||||
for j in range(K): | ||||||
|
@@ -570,7 +578,8 @@ def group_prod(floating[:, ::1] out, | |||||
int64_t[::1] counts, | ||||||
ndarray[floating, ndim=2] values, | ||||||
const intp_t[::1] labels, | ||||||
Py_ssize_t min_count=0) -> None: | ||||||
Py_ssize_t min_count=0, | ||||||
bint skipna=True) -> None: | ||||||
""" | ||||||
Only aggregates on axis=0 | ||||||
""" | ||||||
|
@@ -603,6 +612,10 @@ def group_prod(floating[:, ::1] out, | |||||
if val == val: | ||||||
nobs[lab, j] += 1 | ||||||
prodx[lab, j] *= val | ||||||
# don't skip nan | ||||||
elif skipna == False: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
prodx[lab, j] = NAN | ||||||
break | ||||||
|
||||||
for i in range(ncounts): | ||||||
for j in range(K): | ||||||
|
@@ -620,6 +633,7 @@ def group_var(floating[:, ::1] out, | |||||
ndarray[floating, ndim=2] values, | ||||||
const intp_t[::1] labels, | ||||||
Py_ssize_t min_count=-1, | ||||||
bint skipna=True, | ||||||
int64_t ddof=1) -> None: | ||||||
cdef: | ||||||
Py_ssize_t i, j, N, K, lab, ncounts = len(counts) | ||||||
|
@@ -709,6 +723,11 @@ def group_mean(floating[:, ::1] out, | |||||
t = sumx[lab, j] + y | ||||||
compensation[lab, j] = t - sumx[lab, j] - y | ||||||
sumx[lab, j] = t | ||||||
# don't skip nan | ||||||
elif skipna == False: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also should be |
||||||
# NOTE: Unsure about this, should this loop break here? | ||||||
sumx[lab, j] = NAN | ||||||
break | ||||||
|
||||||
for i in range(ncounts): | ||||||
for j in range(K): | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes. if skipna is False and
not checknull(val)
(L524 above) then wesumx[lab, j]
needs to be incremented byval
(so will either become NaN or raise)