Skip to content

Commit f00b10e

Browse files
Backport PR pandas-dev#44658: BUG: groupby.sum with timedelta64 and NaT
1 parent ec05c80 commit f00b10e

File tree

5 files changed

+46
-2
lines changed

5 files changed

+46
-2
lines changed

doc/source/whatsnew/v1.3.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`)
2020
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
2121
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
22+
- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

pandas/_libs/groupby.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def group_add(
5353
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]
5454
labels: np.ndarray, # const intp_t[:]
5555
min_count: int = ...,
56+
datetimelike: bool = ...,
5657
) -> None: ...
5758
def group_prod(
5859
out: np.ndarray, # floating[:, ::1]

pandas/_libs/groupby.pyx

+10-2
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,8 @@ def group_add(add_t[:, ::1] out,
483483
int64_t[::1] counts,
484484
ndarray[add_t, ndim=2] values,
485485
const intp_t[:] labels,
486-
Py_ssize_t min_count=0) -> None:
486+
Py_ssize_t min_count=0,
487+
bint datetimelike=False) -> None:
487488
"""
488489
Only aggregates on axis=0 using Kahan summation
489490
"""
@@ -545,7 +546,14 @@ def group_add(add_t[:, ::1] out,
545546
val = values[i, j]
546547

547548
# not nan
548-
if val == val:
549+
# With dt64/td64 values, values have been cast to float64
550+
# instead if int64 for group_add, but the logic
551+
# is otherwise the same as in _treat_as_na
552+
if val == val and not (
553+
add_t is float64_t
554+
and datetimelike
555+
and val == <float64_t>NPY_NAT
556+
):
549557
nobs[lab, j] += 1
550558
y = val - compensation[lab, j]
551559
t = sumx[lab, j] + y

pandas/core/groupby/ops.py

+10
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,16 @@ def _call_cython_op(
574574
min_count,
575575
is_datetimelike=is_datetimelike,
576576
)
577+
elif self.how in ["add"]:
578+
# We support datetimelike
579+
func(
580+
result,
581+
counts,
582+
values,
583+
comp_ids,
584+
min_count,
585+
datetimelike=is_datetimelike,
586+
)
577587
else:
578588
func(result, counts, values, comp_ids, min_count)
579589
else:

pandas/tests/groupby/test_function.py

+24
Original file line numberDiff line numberDiff line change
@@ -1105,3 +1105,27 @@ def test_groupby_sum_below_mincount_nullable_integer():
11051105
result = grouped.sum(min_count=2)
11061106
expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx)
11071107
tm.assert_frame_equal(result, expected)
1108+
1109+
1110+
def test_groupby_sum_timedelta_with_nat():
1111+
# GH#42659
1112+
df = DataFrame(
1113+
{
1114+
"a": [1, 1, 2, 2],
1115+
"b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT],
1116+
}
1117+
)
1118+
td3 = pd.Timedelta(days=3)
1119+
1120+
gb = df.groupby("a")
1121+
1122+
res = gb.sum()
1123+
expected = DataFrame({"b": [td3, td3]}, index=Index([1, 2], name="a"))
1124+
tm.assert_frame_equal(res, expected)
1125+
1126+
res = gb["b"].sum()
1127+
tm.assert_series_equal(res, expected["b"])
1128+
1129+
res = gb["b"].sum(min_count=2)
1130+
expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index)
1131+
tm.assert_series_equal(res, expected)

0 commit comments

Comments
 (0)